Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
715fa638
Commit
715fa638
authored
Jan 14, 2020
by
Julien Chaumond
Browse files
Merge branch 'master' into from_scratch_training
parents
764f836d
100e3b6f
Changes
27
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
89 additions
and
59 deletions
+89
-59
src/transformers/file_utils.py
src/transformers/file_utils.py
+8
-11
src/transformers/modeling_albert.py
src/transformers/modeling_albert.py
+3
-0
src/transformers/modeling_bert.py
src/transformers/modeling_bert.py
+3
-0
src/transformers/modeling_roberta.py
src/transformers/modeling_roberta.py
+3
-0
src/transformers/modeling_t5.py
src/transformers/modeling_t5.py
+1
-0
src/transformers/pipelines.py
src/transformers/pipelines.py
+64
-48
tests/test_modeling_common.py
tests/test_modeling_common.py
+7
-0
No files found.
src/transformers/file_utils.py
View file @
715fa638
...
...
@@ -384,16 +384,13 @@ def get_from_cache(
else
:
http_get
(
url
,
temp_file
,
proxies
=
proxies
,
resume_size
=
resume_size
,
user_agent
=
user_agent
)
# we are copying the file before closing it, so flush to avoid truncation
temp_file
.
flush
()
logger
.
info
(
"storing %s in cache at %s"
,
url
,
cache_path
)
os
.
rename
(
temp_file
.
name
,
cache_path
)
logger
.
info
(
"creating metadata file for %s"
,
cache_path
)
meta
=
{
"url"
:
url
,
"etag"
:
etag
}
meta_path
=
cache_path
+
".json"
with
open
(
meta_path
,
"w"
)
as
meta_file
:
json
.
dump
(
meta
,
meta_file
)
logger
.
info
(
"storing %s in cache at %s"
,
url
,
cache_path
)
os
.
rename
(
temp_file
.
name
,
cache_path
)
logger
.
info
(
"creating metadata file for %s"
,
cache_path
)
meta
=
{
"url"
:
url
,
"etag"
:
etag
}
meta_path
=
cache_path
+
".json"
with
open
(
meta_path
,
"w"
)
as
meta_file
:
json
.
dump
(
meta
,
meta_file
)
return
cache_path
src/transformers/modeling_albert.py
View file @
715fa638
...
...
@@ -579,6 +579,9 @@ class AlbertMLMHead(nn.Module):
self
.
decoder
=
nn
.
Linear
(
config
.
embedding_size
,
config
.
vocab_size
)
self
.
activation
=
ACT2FN
[
config
.
hidden_act
]
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self
.
decoder
.
bias
=
self
.
bias
def
forward
(
self
,
hidden_states
):
hidden_states
=
self
.
dense
(
hidden_states
)
hidden_states
=
self
.
activation
(
hidden_states
)
...
...
src/transformers/modeling_bert.py
View file @
715fa638
...
...
@@ -481,6 +481,9 @@ class BertLMPredictionHead(nn.Module):
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self
.
decoder
.
bias
=
self
.
bias
def
forward
(
self
,
hidden_states
):
hidden_states
=
self
.
transform
(
hidden_states
)
hidden_states
=
self
.
decoder
(
hidden_states
)
+
self
.
bias
...
...
src/transformers/modeling_roberta.py
View file @
715fa638
...
...
@@ -306,6 +306,9 @@ class RobertaLMHead(nn.Module):
self
.
decoder
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
vocab_size
,
bias
=
False
)
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
config
.
vocab_size
))
# Need a link between the two variables so that the bias is correctly resized with `resize_token_embeddings`
self
.
decoder
.
bias
=
self
.
bias
def
forward
(
self
,
features
,
**
kwargs
):
x
=
self
.
dense
(
features
)
x
=
gelu
(
x
)
...
...
src/transformers/modeling_t5.py
View file @
715fa638
...
...
@@ -286,6 +286,7 @@ class T5Attention(nn.Module):
bidirectional
=
not
self
.
is_decoder
,
num_buckets
=
self
.
relative_attention_num_buckets
,
)
rp_bucket
=
rp_bucket
.
to
(
self
.
relative_attention_bias
.
weight
.
device
)
values
=
self
.
relative_attention_bias
(
rp_bucket
)
# shape (qlen, klen, num_heads)
values
=
values
.
permute
([
2
,
0
,
1
]).
unsqueeze
(
0
)
# shape (1, num_heads, qlen, klen)
return
values
...
...
src/transformers/pipelines.py
View file @
715fa638
...
...
@@ -705,55 +705,71 @@ class QuestionAnsweringPipeline(Pipeline):
# Convert inputs to features
examples
=
self
.
_args_parser
(
*
texts
,
**
kwargs
)
features
=
squad_convert_examples_to_features
(
examples
,
self
.
tokenizer
,
kwargs
[
"max_seq_len"
],
kwargs
[
"doc_stride"
],
kwargs
[
"max_question_len"
],
False
)
fw_args
=
self
.
inputs_for_model
([
f
.
__dict__
for
f
in
features
])
features_list
=
[
squad_convert_examples_to_features
(
[
example
],
self
.
tokenizer
,
kwargs
[
"max_seq_len"
],
kwargs
[
"doc_stride"
],
kwargs
[
"max_question_len"
],
False
,
)
for
example
in
examples
]
all_answers
=
[]
for
features
,
example
in
zip
(
features_list
,
examples
):
fw_args
=
self
.
inputs_for_model
([
f
.
__dict__
for
f
in
features
])
# Manage tensor allocation on correct device
with
self
.
device_placement
():
if
self
.
framework
==
"tf"
:
fw_args
=
{
k
:
tf
.
constant
(
v
)
for
(
k
,
v
)
in
fw_args
.
items
()}
start
,
end
=
self
.
model
(
fw_args
)
start
,
end
=
start
.
numpy
(),
end
.
numpy
()
else
:
with
torch
.
no_grad
():
# Retrieve the score for the context tokens only (removing question tokens)
fw_args
=
{
k
:
torch
.
tensor
(
v
,
device
=
self
.
device
)
for
(
k
,
v
)
in
fw_args
.
items
()}
start
,
end
=
self
.
model
(
**
fw_args
)
start
,
end
=
start
.
cpu
().
numpy
(),
end
.
cpu
().
numpy
()
answers
=
[]
for
(
example
,
feature
,
start_
,
end_
)
in
zip
(
examples
,
features
,
start
,
end
):
# Normalize logits and spans to retrieve the answer
start_
=
np
.
exp
(
start_
)
/
np
.
sum
(
np
.
exp
(
start_
))
end_
=
np
.
exp
(
end_
)
/
np
.
sum
(
np
.
exp
(
end_
))
# Mask padding and question
start_
,
end_
=
start_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
end_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
)
# TODO : What happens if not possible
# Mask CLS
start_
[
0
]
=
end_
[
0
]
=
0
starts
,
ends
,
scores
=
self
.
decode
(
start_
,
end_
,
kwargs
[
"topk"
],
kwargs
[
"max_answer_len"
])
char_to_word
=
np
.
array
(
example
.
char_to_word_offset
)
# Convert the answer (tokens) back to the original text
answers
+=
[
{
"score"
:
score
.
item
(),
"start"
:
np
.
where
(
char_to_word
==
feature
.
token_to_orig_map
[
s
])[
0
][
0
].
item
(),
"end"
:
np
.
where
(
char_to_word
==
feature
.
token_to_orig_map
[
e
])[
0
][
-
1
].
item
(),
"answer"
:
" "
.
join
(
example
.
doc_tokens
[
feature
.
token_to_orig_map
[
s
]
:
feature
.
token_to_orig_map
[
e
]
+
1
]
),
}
for
s
,
e
,
score
in
zip
(
starts
,
ends
,
scores
)
]
if
len
(
answers
)
==
1
:
return
answers
[
0
]
return
answers
# Manage tensor allocation on correct device
with
self
.
device_placement
():
if
self
.
framework
==
"tf"
:
fw_args
=
{
k
:
tf
.
constant
(
v
)
for
(
k
,
v
)
in
fw_args
.
items
()}
start
,
end
=
self
.
model
(
fw_args
)
start
,
end
=
start
.
numpy
(),
end
.
numpy
()
else
:
with
torch
.
no_grad
():
# Retrieve the score for the context tokens only (removing question tokens)
fw_args
=
{
k
:
torch
.
tensor
(
v
,
device
=
self
.
device
)
for
(
k
,
v
)
in
fw_args
.
items
()}
start
,
end
=
self
.
model
(
**
fw_args
)
start
,
end
=
start
.
cpu
().
numpy
(),
end
.
cpu
().
numpy
()
answers
=
[]
for
(
feature
,
start_
,
end_
)
in
zip
(
features
,
start
,
end
):
# Normalize logits and spans to retrieve the answer
start_
=
np
.
exp
(
start_
)
/
np
.
sum
(
np
.
exp
(
start_
))
end_
=
np
.
exp
(
end_
)
/
np
.
sum
(
np
.
exp
(
end_
))
# Mask padding and question
start_
,
end_
=
(
start_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
end_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
)
# TODO : What happens if not possible
# Mask CLS
start_
[
0
]
=
end_
[
0
]
=
0
starts
,
ends
,
scores
=
self
.
decode
(
start_
,
end_
,
kwargs
[
"topk"
],
kwargs
[
"max_answer_len"
])
char_to_word
=
np
.
array
(
example
.
char_to_word_offset
)
# Convert the answer (tokens) back to the original text
answers
+=
[
{
"score"
:
score
.
item
(),
"start"
:
np
.
where
(
char_to_word
==
feature
.
token_to_orig_map
[
s
])[
0
][
0
].
item
(),
"end"
:
np
.
where
(
char_to_word
==
feature
.
token_to_orig_map
[
e
])[
0
][
-
1
].
item
(),
"answer"
:
" "
.
join
(
example
.
doc_tokens
[
feature
.
token_to_orig_map
[
s
]
:
feature
.
token_to_orig_map
[
e
]
+
1
]
),
}
for
s
,
e
,
score
in
zip
(
starts
,
ends
,
scores
)
]
answers
=
sorted
(
answers
,
key
=
lambda
x
:
x
[
"score"
],
reverse
=
True
)[:
kwargs
[
"topk"
]]
all_answers
+=
answers
if
len
(
all_answers
)
==
1
:
return
all_answers
[
0
]
return
all_answers
def
decode
(
self
,
start
:
np
.
ndarray
,
end
:
np
.
ndarray
,
topk
:
int
,
max_answer_len
:
int
)
->
Tuple
:
"""
...
...
tests/test_modeling_common.py
View file @
715fa638
...
...
@@ -485,6 +485,8 @@ class ModelTesterMixin:
self
.
assertEqual
(
model
.
config
.
vocab_size
,
model_vocab_size
+
10
)
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
+
10
)
# Check that the model can still do a forward pass successfully (every parameter should be resized)
model
(
**
inputs_dict
)
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
model_embed
=
model
.
resize_token_embeddings
(
model_vocab_size
-
15
)
...
...
@@ -492,6 +494,11 @@ class ModelTesterMixin:
# Check that it actually resizes the embeddings matrix
self
.
assertEqual
(
model_embed
.
weight
.
shape
[
0
],
cloned_embeddings
.
shape
[
0
]
-
15
)
# Check that the model can still do a forward pass successfully (every parameter should be resized)
# Input ids should be clamped to the maximum size of the vocabulary
inputs_dict
[
"input_ids"
].
clamp_
(
max
=
model_vocab_size
-
15
-
1
)
model
(
**
inputs_dict
)
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
models_equal
=
True
for
p1
,
p2
in
zip
(
cloned_embeddings
,
model_embed
.
weight
):
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment