Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
715fa638
Commit
715fa638
authored
Jan 14, 2020
by
Julien Chaumond
Browse files
Merge branch 'master' into from_scratch_training
parents
764f836d
100e3b6f
Changes
27
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
89 additions
and
59 deletions
+89
-59
src/transformers/file_utils.py
src/transformers/file_utils.py
+8
-11
src/transformers/modeling_albert.py
src/transformers/modeling_albert.py
+3
-0
src/transformers/modeling_bert.py
src/transformers/modeling_bert.py
+3
-0
src/transformers/modeling_roberta.py
src/transformers/modeling_roberta.py
+3
-0
src/transformers/modeling_t5.py
src/transformers/modeling_t5.py
+1
-0
src/transformers/pipelines.py
src/transformers/pipelines.py
+64
-48
tests/test_modeling_common.py
tests/test_modeling_common.py
+7
-0
No files found.
src/transformers/file_utils.py
View file @
715fa638
...
@@ -384,16 +384,13 @@ def get_from_cache(
...
@@ -384,16 +384,13 @@ def get_from_cache(
else
:
else
:
http_get
(
url
,
temp_file
,
proxies
=
proxies
,
resume_size
=
resume_size
,
user_agent
=
user_agent
)
http_get
(
url
,
temp_file
,
proxies
=
proxies
,
resume_size
=
resume_size
,
user_agent
=
user_agent
)
# we are copying the file before closing it, so flush to avoid truncation
logger
.
info
(
"storing %s in cache at %s"
,
url
,
cache_path
)
temp_file
.
flush
()
os
.
rename
(
temp_file
.
name
,
cache_path
)
logger
.
info
(
"storing %s in cache at %s"
,
url
,
cache_path
)
logger
.
info
(
"creating metadata file for %s"
,
cache_path
)
os
.
rename
(
temp_file
.
name
,
cache_path
)
meta
=
{
"url"
:
url
,
"etag"
:
etag
}
meta_path
=
cache_path
+
".json"
logger
.
info
(
"creating metadata file for %s"
,
cache_path
)
with
open
(
meta_path
,
"w"
)
as
meta_file
:
meta
=
{
"url"
:
url
,
"etag"
:
etag
}
json
.
dump
(
meta
,
meta_file
)
meta_path
=
cache_path
+
".json"
with
open
(
meta_path
,
"w"
)
as
meta_file
:
json
.
dump
(
meta
,
meta_file
)
return
cache_path
return
cache_path
src/transformers/modeling_albert.py
View file @
715fa638
...
@@ -579,6 +579,9 @@ class AlbertMLMHead(nn.Module):
...
@@ -579,6 +579,9 @@ class AlbertMLMHead(nn.Module):
self
.
decoder
=
nn
.
Linear
(
config
.
embedding_size
,
config
.
vocab_size
)
self
.
decoder
=
nn
.
Linear
(
config
.
embedding_size
,
config
.
vocab_size
)
self
.
activation
=
ACT2FN
[
config
.
hidden_act
]
self
.
activation
=
ACT2FN
[
config
.
hidden_act
]
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self
.
decoder
.
bias
=
self
.
bias
def
forward
(
self
,
hidden_states
):
def
forward
(
self
,
hidden_states
):
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
activation
(
hidden_states
)
hidden_states
=
self
.
activation
(
hidden_states
)
...
...
src/transformers/modeling_bert.py
View file @
715fa638
...
@@ -481,6 +481,9 @@ class BertLMPredictionHead(nn.Module):
...
@@ -481,6 +481,9 @@ class BertLMPredictionHead(nn.Module):
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self
.
decoder
.
bias
=
self
.
bias
def
forward
(
self
,
hidden_states
):
def
forward
(
self
,
hidden_states
):
hidden_states
=
self
.
transform
(
hidden_states
)
hidden_states
=
self
.
transform
(
hidden_states
)
hidden_states
=
self
.
decoder
(
hidden_states
)
+
self
.
bias
hidden_states
=
self
.
decoder
(
hidden_states
)
+
self
.
bias
...
...
src/transformers/modeling_roberta.py
View file @
715fa638
...
@@ -306,6 +306,9 @@ class RobertaLMHead(nn.Module):
...
@@ -306,6 +306,9 @@ class RobertaLMHead(nn.Module):
self
.
decoder
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
vocab_size
,
bias
=
False
)
self
.
decoder
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
vocab_size
,
bias
=
False
)
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self
.
decoder
.
bias
=
self
.
bias
def
forward
(
self
,
features
,
**
kwargs
):
def
forward
(
self
,
features
,
**
kwargs
):
x
=
self
.
dense
(
features
)
x
=
self
.
dense
(
features
)
x
=
gelu
(
x
)
x
=
gelu
(
x
)
...
...
src/transformers/modeling_t5.py
View file @
715fa638
...
@@ -286,6 +286,7 @@ class T5Attention(nn.Module):
...
@@ -286,6 +286,7 @@ class T5Attention(nn.Module):
bidirectional
=
not
self
.
is_decoder
,
bidirectional
=
not
self
.
is_decoder
,
num_buckets
=
self
.
relative_attention_num_buckets
,
num_buckets
=
self
.
relative_attention_num_buckets
,
)
)
rp_bucket
=
rp_bucket
.
to
(
self
.
relative_attention_bias
.
weight
.
device
)
values
=
self
.
relative_attention_bias
(
rp_bucket
)
# shape (qlen, klen, num_heads)
values
=
self
.
relative_attention_bias
(
rp_bucket
)
# shape (qlen, klen, num_heads)
values
=
values
.
permute
([
2
,
0
,
1
]).
unsqueeze
(
0
)
# shape (1, num_heads, qlen, klen)
values
=
values
.
permute
([
2
,
0
,
1
]).
unsqueeze
(
0
)
# shape (1, num_heads, qlen, klen)
return
values
return
values
...
...
src/transformers/pipelines.py
View file @
715fa638
...
@@ -705,55 +705,71 @@ class QuestionAnsweringPipeline(Pipeline):
...
@@ -705,55 +705,71 @@ class QuestionAnsweringPipeline(Pipeline):
# Convert inputs to features
# Convert inputs to features
examples
=
self
.
_args_parser
(
*
texts
,
**
kwargs
)
examples
=
self
.
_args_parser
(
*
texts
,
**
kwargs
)
features
=
squad_convert_examples_to_features
(
features_list
=
[
examples
,
self
.
tokenizer
,
kwargs
[
"max_seq_len"
],
kwargs
[
"doc_stride"
],
kwargs
[
"max_question_len"
],
False
squad_convert_examples_to_features
(
)
[
example
],
fw_args
=
self
.
inputs_for_model
([
f
.
__dict__
for
f
in
features
])
self
.
tokenizer
,
kwargs
[
"max_seq_len"
],
kwargs
[
"doc_stride"
],
kwargs
[
"max_question_len"
],
False
,
)
for
example
in
examples
]
all_answers
=
[]
for
features
,
example
in
zip
(
features_list
,
examples
):
fw_args
=
self
.
inputs_for_model
([
f
.
__dict__
for
f
in
features
])
# Manage tensor allocation on correct device
# Manage tensor allocation on correct device
with
self
.
device_placement
():
with
self
.
device_placement
():
if
self
.
framework
==
"tf"
:
if
self
.
framework
==
"tf"
:
fw_args
=
{
k
:
tf
.
constant
(
v
)
for
(
k
,
v
)
in
fw_args
.
items
()}
fw_args
=
{
k
:
tf
.
constant
(
v
)
for
(
k
,
v
)
in
fw_args
.
items
()}
start
,
end
=
self
.
model
(
fw_args
)
start
,
end
=
self
.
model
(
fw_args
)
start
,
end
=
start
.
numpy
(),
end
.
numpy
()
start
,
end
=
start
.
numpy
(),
end
.
numpy
()
else
:
else
:
with
torch
.
no_grad
():
with
torch
.
no_grad
():
# Retrieve the score for the context tokens only (removing question tokens)
# Retrieve the score for the context tokens only (removing question tokens)
fw_args
=
{
k
:
torch
.
tensor
(
v
,
device
=
self
.
device
)
for
(
k
,
v
)
in
fw_args
.
items
()}
fw_args
=
{
k
:
torch
.
tensor
(
v
,
device
=
self
.
device
)
for
(
k
,
v
)
in
fw_args
.
items
()}
start
,
end
=
self
.
model
(
**
fw_args
)
start
,
end
=
self
.
model
(
**
fw_args
)
start
,
end
=
start
.
cpu
().
numpy
(),
end
.
cpu
().
numpy
()
start
,
end
=
start
.
cpu
().
numpy
(),
end
.
cpu
().
numpy
()
answers
=
[]
answers
=
[]
for
(
example
,
feature
,
start_
,
end_
)
in
zip
(
examples
,
features
,
start
,
end
):
for
(
feature
,
start_
,
end_
)
in
zip
(
features
,
start
,
end
):
# Normalize logits and spans to retrieve the answer
# Normalize logits and spans to retrieve the answer
start_
=
np
.
exp
(
start_
)
/
np
.
sum
(
np
.
exp
(
start_
))
start_
=
np
.
exp
(
start_
)
/
np
.
sum
(
np
.
exp
(
start_
))
end_
=
np
.
exp
(
end_
)
/
np
.
sum
(
np
.
exp
(
end_
))
end_
=
np
.
exp
(
end_
)
/
np
.
sum
(
np
.
exp
(
end_
))
# Mask padding and question
# Mask padding and question
start_
,
end_
=
start_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
end_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
)
start_
,
end_
=
(
start_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
# TODO : What happens if not possible
end_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
# Mask CLS
)
start_
[
0
]
=
end_
[
0
]
=
0
# TODO : What happens if not possible
starts
,
ends
,
scores
=
self
.
decode
(
start_
,
end_
,
kwargs
[
"topk"
],
kwargs
[
"max_answer_len"
])
# Mask CLS
char_to_word
=
np
.
array
(
example
.
char_to_word_offset
)
start_
[
0
]
=
end_
[
0
]
=
0
# Convert the answer (tokens) back to the original text
starts
,
ends
,
scores
=
self
.
decode
(
start_
,
end_
,
kwargs
[
"topk"
],
kwargs
[
"max_answer_len"
])
answers
+=
[
char_to_word
=
np
.
array
(
example
.
char_to_word_offset
)
{
"score"
:
score
.
item
(),
# Convert the answer (tokens) back to the original text
"start"
:
np
.
where
(
char_to_word
==
feature
.
token_to_orig_map
[
s
])[
0
][
0
].
item
(),
answers
+=
[
"end"
:
np
.
where
(
char_to_word
==
feature
.
token_to_orig_map
[
e
])[
0
][
-
1
].
item
(),
{
"answer"
:
" "
.
join
(
"score"
:
score
.
item
(),
example
.
doc_tokens
[
feature
.
token_to_orig_map
[
s
]
:
feature
.
token_to_orig_map
[
e
]
+
1
]
"start"
:
np
.
where
(
char_to_word
==
feature
.
token_to_orig_map
[
s
])[
0
][
0
].
item
(),
),
"end"
:
np
.
where
(
char_to_word
==
feature
.
token_to_orig_map
[
e
])[
0
][
-
1
].
item
(),
}
"answer"
:
" "
.
join
(
for
s
,
e
,
score
in
zip
(
starts
,
ends
,
scores
)
example
.
doc_tokens
[
feature
.
token_to_orig_map
[
s
]
:
feature
.
token_to_orig_map
[
e
]
+
1
]
]
),
if
len
(
answers
)
==
1
:
}
return
answers
[
0
]
for
s
,
e
,
score
in
zip
(
starts
,
ends
,
scores
)
return
answers
]
answers
=
sorted
(
answers
,
key
=
lambda
x
:
x
[
"score"
],
reverse
=
True
)[:
kwargs
[
"topk"
]]
all_answers
+=
answers
if
len
(
all_answers
)
==
1
:
return
all_answers
[
0
]
return
all_answers
def
decode
(
self
,
start
:
np
.
ndarray
,
end
:
np
.
ndarray
,
topk
:
int
,
max_answer_len
:
int
)
->
Tuple
:
def
decode
(
self
,
start
:
np
.
ndarray
,
end
:
np
.
ndarray
,
topk
:
int
,
max_answer_len
:
int
)
->
Tuple
:
"""
"""
...
...
tests/test_modeling_common.py
View file @
715fa638
...
@@ -485,6 +485,8 @@ class ModelTesterMixin:
...
@@ -485,6 +485,8 @@ class ModelTesterMixin:
self
.
assertEqual
(
model
.
config
.
vocab_size
,
model_vocab_size
+
10
)
self
.
assertEqual
(
model
.
config
.
vocab_size
,
model_vocab_size
+
10
)
# Check that it actually resizes the embeddings matrix
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
+
10
)
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
+
10
)
# Check that the model can still do a forward pass successfully (every parameter should be resized)
model
(
**
inputs_dict
)
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
-
15
)
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
-
15
)
...
@@ -492,6 +494,11 @@ class ModelTesterMixin:
...
@@ -492,6 +494,11 @@ class ModelTesterMixin:
# Check that it actually resizes the embeddings matrix
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
-
15
)
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
-
15
)
# Check that the model can still do a forward pass successfully (every parameter should be resized)
# Input ids should be clamped to the maximum size of the vocabulary
inputs_dict
[
"input_ids"
].
clamp_
(
max
=
model_vocab_size
-
15
-
1
)
model
(
**
inputs_dict
)
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
models_equal
=
True
models_equal
=
True
for
p1
,
p2
in
zip
(
cloned_embeddings
,
model_embed
.
weight
):
for
p1
,
p2
in
zip
(
cloned_embeddings
,
model_embed
.
weight
):
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment