Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
adb5c79f
"vscode:/vscode.git/clone" did not exist on "de6f182c6b1d2f03a930fc9b81ddf70af2702e22"
Commit
adb5c79f
authored
Nov 28, 2019
by
thomwolf
Committed by
Lysandre Debut
Nov 29, 2019
Browse files
update all tf.shape and tensor.shape to shape_list
parent
1ab8dc44
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
48 additions
and
54 deletions
+48
-54
templates/adding_a_new_model/modeling_tf_xxx.py
templates/adding_a_new_model/modeling_tf_xxx.py
+3
-3
transformers/__init__.py
transformers/__init__.py
+1
-1
transformers/modeling_tf_albert.py
transformers/modeling_tf_albert.py
+11
-16
transformers/modeling_tf_bert.py
transformers/modeling_tf_bert.py
+13
-13
transformers/modeling_tf_ctrl.py
transformers/modeling_tf_ctrl.py
+1
-1
transformers/modeling_tf_distilbert.py
transformers/modeling_tf_distilbert.py
+4
-4
transformers/modeling_tf_gpt2.py
transformers/modeling_tf_gpt2.py
+1
-1
transformers/modeling_tf_openai.py
transformers/modeling_tf_openai.py
+1
-1
transformers/modeling_tf_roberta.py
transformers/modeling_tf_roberta.py
+3
-3
transformers/modeling_tf_transfo_xl.py
transformers/modeling_tf_transfo_xl.py
+1
-1
transformers/modeling_tf_transfo_xl_utilities.py
transformers/modeling_tf_transfo_xl_utilities.py
+2
-2
transformers/modeling_tf_utils.py
transformers/modeling_tf_utils.py
+1
-1
transformers/modeling_tf_xlnet.py
transformers/modeling_tf_xlnet.py
+6
-7
No files found.
templates/adding_a_new_model/modeling_tf_xxx.py
View file @
adb5c79f
...
...
@@ -32,7 +32,7 @@ import numpy as np
import
tensorflow
as
tf
from
.configuration_xxx
import
XxxConfig
from
.modeling_tf_utils
import
TFPreTrainedModel
,
get_initializer
from
.modeling_tf_utils
import
TFPreTrainedModel
,
get_initializer
,
shape_list
from
.file_utils
import
add_start_docstrings
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -121,9 +121,9 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
input_ids
=
inputs
if
attention_mask
is
None
:
attention_mask
=
tf
.
fill
(
tf
.
shape
(
input_ids
),
1
)
attention_mask
=
tf
.
fill
(
shape
_list
(
input_ids
),
1
)
if
token_type_ids
is
None
:
token_type_ids
=
tf
.
fill
(
tf
.
shape
(
input_ids
),
0
)
token_type_ids
=
tf
.
fill
(
shape
_list
(
input_ids
),
0
)
# We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length]
...
...
transformers/__init__.py
View file @
adb5c79f
...
...
@@ -118,7 +118,7 @@ if is_torch_available():
# TensorFlow
if
is_tf_available
():
from
.modeling_tf_utils
import
TFPreTrainedModel
,
TFSharedEmbeddings
,
TFSequenceSummary
from
.modeling_tf_utils
import
TFPreTrainedModel
,
TFSharedEmbeddings
,
TFSequenceSummary
,
shape_list
from
.modeling_tf_auto
import
(
TFAutoModel
,
TFAutoModelForSequenceClassification
,
TFAutoModelForQuestionAnswering
,
TFAutoModelWithLMHead
)
...
...
transformers/modeling_tf_albert.py
View file @
adb5c79f
...
...
@@ -16,18 +16,13 @@
""" TF 2.0 ALBERT model. """
from
__future__
import
absolute_import
,
division
,
print_function
,
unicode_literals
import
json
import
logging
import
math
import
os
import
sys
from
io
import
open
import
numpy
as
np
import
tensorflow
as
tf
from
.configuration_albert
import
AlbertConfig
from
.modeling_tf_utils
import
TFPreTrainedModel
,
get_initializer
from
.modeling_tf_utils
import
TFPreTrainedModel
,
get_initializer
,
shape_list
from
.modeling_tf_bert
import
ACT2FN
,
TFBertSelfAttention
from
.file_utils
import
add_start_docstrings
...
...
@@ -110,9 +105,9 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer):
input_ids
,
position_ids
,
token_type_ids
,
inputs_embeds
=
inputs
if
input_ids
is
not
None
:
input_shape
=
tf
.
shape
(
input_ids
)
input_shape
=
shape
_list
(
input_ids
)
else
:
input_shape
=
tf
.
shape
(
inputs_embeds
)[:
-
1
]
input_shape
=
shape
_list
(
inputs_embeds
)[:
-
1
]
seq_length
=
input_shape
[
1
]
if
position_ids
is
None
:
...
...
@@ -137,8 +132,8 @@ class TFAlbertEmbeddings(tf.keras.layers.Layer):
Returns:
float32 tensor with shape [batch_size, length, vocab_size].
"""
batch_size
=
tf
.
shape
(
inputs
)[
0
]
length
=
tf
.
shape
(
inputs
)[
1
]
batch_size
=
shape
_list
(
inputs
)[
0
]
length
=
shape
_list
(
inputs
)[
1
]
x
=
tf
.
reshape
(
inputs
,
[
-
1
,
self
.
config
.
embedding_size
])
logits
=
tf
.
matmul
(
x
,
self
.
word_embeddings
,
transpose_b
=
True
)
return
tf
.
reshape
(
logits
,
[
batch_size
,
length
,
self
.
config
.
vocab_size
])
...
...
@@ -183,7 +178,7 @@ class TFAlbertSelfAttention(tf.keras.layers.Layer):
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
attention_mask
,
head_mask
=
inputs
batch_size
=
tf
.
shape
(
hidden_states
)[
0
]
batch_size
=
shape
_list
(
hidden_states
)[
0
]
mixed_query_layer
=
self
.
query
(
hidden_states
)
mixed_key_layer
=
self
.
key
(
hidden_states
)
mixed_value_layer
=
self
.
value
(
hidden_states
)
...
...
@@ -196,7 +191,7 @@ class TFAlbertSelfAttention(tf.keras.layers.Layer):
# (batch size, num_heads, seq_len_q, seq_len_k)
attention_scores
=
tf
.
matmul
(
query_layer
,
key_layer
,
transpose_b
=
True
)
# scale attention_scores
dk
=
tf
.
cast
(
tf
.
shape
(
key_layer
)[
-
1
],
tf
.
float32
)
dk
=
tf
.
cast
(
shape
_list
(
key_layer
)[
-
1
],
tf
.
float32
)
attention_scores
=
attention_scores
/
tf
.
math
.
sqrt
(
dk
)
if
attention_mask
is
not
None
:
...
...
@@ -264,7 +259,7 @@ class TFAlbertAttention(TFBertSelfAttention):
def
call
(
self
,
inputs
,
training
=
False
):
input_tensor
,
attention_mask
,
head_mask
=
inputs
batch_size
=
tf
.
shape
(
input_tensor
)[
0
]
batch_size
=
shape
_list
(
input_tensor
)[
0
]
mixed_query_layer
=
self
.
query
(
input_tensor
)
mixed_key_layer
=
self
.
key
(
input_tensor
)
mixed_value_layer
=
self
.
value
(
input_tensor
)
...
...
@@ -277,7 +272,7 @@ class TFAlbertAttention(TFBertSelfAttention):
# (batch size, num_heads, seq_len_q, seq_len_k)
attention_scores
=
tf
.
matmul
(
query_layer
,
key_layer
,
transpose_b
=
True
)
# scale attention_scores
dk
=
tf
.
cast
(
tf
.
shape
(
key_layer
)[
-
1
],
tf
.
float32
)
dk
=
tf
.
cast
(
shape
_list
(
key_layer
)[
-
1
],
tf
.
float32
)
attention_scores
=
attention_scores
/
tf
.
math
.
sqrt
(
dk
)
if
attention_mask
is
not
None
:
...
...
@@ -645,9 +640,9 @@ class TFAlbertModel(TFAlbertPreTrainedModel):
if
input_ids
is
not
None
and
inputs_embeds
is
not
None
:
raise
ValueError
(
"You cannot specify both input_ids and inputs_embeds at the same time"
)
elif
input_ids
is
not
None
:
input_shape
=
tf
.
shape
(
input_ids
)
input_shape
=
shape
_list
(
input_ids
)
elif
inputs_embeds
is
not
None
:
input_shape
=
inputs_embeds
.
shape
[:
-
1
]
input_shape
=
shape_list
(
inputs_embeds
)
[:
-
1
]
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
...
...
transformers/modeling_tf_bert.py
View file @
adb5c79f
...
...
@@ -28,7 +28,7 @@ import numpy as np
import
tensorflow
as
tf
from
.configuration_bert
import
BertConfig
from
.modeling_tf_utils
import
TFPreTrainedModel
,
get_initializer
from
.modeling_tf_utils
import
TFPreTrainedModel
,
get_initializer
,
shape_list
from
.file_utils
import
add_start_docstrings
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -145,9 +145,9 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
input_ids
,
position_ids
,
token_type_ids
,
inputs_embeds
=
inputs
if
input_ids
is
not
None
:
input_shape
=
tf
.
shape
(
input_ids
)
input_shape
=
shape
_list
(
input_ids
)
else
:
input_shape
=
tf
.
shape
(
inputs_embeds
)[:
-
1
]
input_shape
=
shape
_list
(
inputs_embeds
)[:
-
1
]
seq_length
=
input_shape
[
1
]
if
position_ids
is
None
:
...
...
@@ -172,8 +172,8 @@ class TFBertEmbeddings(tf.keras.layers.Layer):
Returns:
float32 tensor with shape [batch_size, length, vocab_size].
"""
batch_size
=
tf
.
shape
(
inputs
)[
0
]
length
=
tf
.
shape
(
inputs
)[
1
]
batch_size
=
shape
_list
(
inputs
)[
0
]
length
=
shape
_list
(
inputs
)[
1
]
x
=
tf
.
reshape
(
inputs
,
[
-
1
,
self
.
hidden_size
])
logits
=
tf
.
matmul
(
x
,
self
.
word_embeddings
,
transpose_b
=
True
)
...
...
@@ -214,7 +214,7 @@ class TFBertSelfAttention(tf.keras.layers.Layer):
def
call
(
self
,
inputs
,
training
=
False
):
hidden_states
,
attention_mask
,
head_mask
=
inputs
batch_size
=
tf
.
shape
(
hidden_states
)[
0
]
batch_size
=
shape
_list
(
hidden_states
)[
0
]
mixed_query_layer
=
self
.
query
(
hidden_states
)
mixed_key_layer
=
self
.
key
(
hidden_states
)
mixed_value_layer
=
self
.
value
(
hidden_states
)
...
...
@@ -225,7 +225,7 @@ class TFBertSelfAttention(tf.keras.layers.Layer):
# Take the dot product between "query" and "key" to get the raw attention scores.
attention_scores
=
tf
.
matmul
(
query_layer
,
key_layer
,
transpose_b
=
True
)
# (batch size, num_heads, seq_len_q, seq_len_k)
dk
=
tf
.
cast
(
tf
.
shape
(
key_layer
)[
-
1
],
tf
.
float32
)
# scale attention_scores
dk
=
tf
.
cast
(
shape
_list
(
key_layer
)[
-
1
],
tf
.
float32
)
# scale attention_scores
attention_scores
=
attention_scores
/
tf
.
math
.
sqrt
(
dk
)
if
attention_mask
is
not
None
:
...
...
@@ -502,9 +502,9 @@ class TFBertMainLayer(tf.keras.layers.Layer):
if
input_ids
is
not
None
and
inputs_embeds
is
not
None
:
raise
ValueError
(
"You cannot specify both input_ids and inputs_embeds at the same time"
)
elif
input_ids
is
not
None
:
input_shape
=
input_ids
.
shape
input_shape
=
shape_list
(
input_ids
)
elif
inputs_embeds
is
not
None
:
input_shape
=
inputs_embeds
.
shape
[:
-
1
]
input_shape
=
shape_list
(
inputs_embeds
)
[:
-
1
]
else
:
raise
ValueError
(
"You have to specify either input_ids or inputs_embeds"
)
...
...
@@ -939,11 +939,11 @@ class TFBertForMultipleChoice(TFBertPreTrainedModel):
input_ids
=
inputs
if
input_ids
is
not
None
:
num_choices
=
tf
.
shape
(
input_ids
)[
1
]
seq_length
=
tf
.
shape
(
input_ids
)[
2
]
num_choices
=
shape
_list
(
input_ids
)[
1
]
seq_length
=
shape
_list
(
input_ids
)[
2
]
else
:
num_choices
=
tf
.
shape
(
inputs_embeds
)[
1
]
seq_length
=
tf
.
shape
(
inputs_embeds
)[
2
]
num_choices
=
shape
_list
(
inputs_embeds
)[
1
]
seq_length
=
shape
_list
(
inputs_embeds
)[
2
]
flat_input_ids
=
tf
.
reshape
(
input_ids
,
(
-
1
,
seq_length
))
if
input_ids
is
not
None
else
None
flat_attention_mask
=
tf
.
reshape
(
attention_mask
,
(
-
1
,
seq_length
))
if
attention_mask
is
not
None
else
None
...
...
transformers/modeling_tf_ctrl.py
View file @
adb5c79f
...
...
@@ -95,7 +95,7 @@ class TFMultiHeadAttention(tf.keras.layers.Layer):
def
call
(
self
,
inputs
,
training
=
False
):
v
,
k
,
q
,
mask
,
layer_past
,
attention_mask
,
head_mask
=
inputs
batch_size
=
q
.
shape
[
0
]
batch_size
=
shape
_list
(
q
)
[
0
]
q
=
self
.
Wq
(
q
)
k
=
self
.
Wk
(
k
)
...
...
transformers/modeling_tf_distilbert.py
View file @
adb5c79f
...
...
@@ -137,9 +137,9 @@ class TFEmbeddings(tf.keras.layers.Layer):
input_ids
,
position_ids
=
inputs
if
input_ids
is
not
None
:
seq_length
=
tf
.
shape
(
input_ids
)[
1
]
seq_length
=
shape
_list
(
input_ids
)[
1
]
else
:
seq_length
=
tf
.
shape
(
inputs_embeds
)[
1
]
seq_length
=
shape
_list
(
inputs_embeds
)[
1
]
if
position_ids
is
None
:
position_ids
=
tf
.
range
(
seq_length
,
dtype
=
tf
.
int32
)[
tf
.
newaxis
,
:]
...
...
@@ -160,8 +160,8 @@ class TFEmbeddings(tf.keras.layers.Layer):
Returns:
float32 tensor with shape [batch_size, length, vocab_size].
"""
batch_size
=
tf
.
shape
(
inputs
)[
0
]
length
=
tf
.
shape
(
inputs
)[
1
]
batch_size
=
shape
_list
(
inputs
)[
0
]
length
=
shape
_list
(
inputs
)[
1
]
x
=
tf
.
reshape
(
inputs
,
[
-
1
,
self
.
dim
])
logits
=
tf
.
matmul
(
x
,
self
.
word_embeddings
,
transpose_b
=
True
)
...
...
transformers/modeling_tf_gpt2.py
View file @
adb5c79f
...
...
@@ -92,7 +92,7 @@ class TFAttention(tf.keras.layers.Layer):
# q, k, v have shape [batch, heads, sequence, features]
w
=
tf
.
matmul
(
q
,
k
,
transpose_b
=
True
)
if
self
.
scale
:
dk
=
tf
.
cast
(
tf
.
shape
(
k
)[
-
1
],
tf
.
float32
)
# scale attention_scores
dk
=
tf
.
cast
(
shape
_list
(
k
)[
-
1
],
tf
.
float32
)
# scale attention_scores
w
=
w
/
tf
.
math
.
sqrt
(
dk
)
# w has shape [batch, heads, dst_sequence, src_sequence], where information flows from src to dst.
...
...
transformers/modeling_tf_openai.py
View file @
adb5c79f
...
...
@@ -98,7 +98,7 @@ class TFAttention(tf.keras.layers.Layer):
# q, k, v have shape [batch, heads, sequence, features]
w
=
tf
.
matmul
(
q
,
k
,
transpose_b
=
True
)
if
self
.
scale
:
dk
=
tf
.
cast
(
tf
.
shape
(
k
)[
-
1
],
tf
.
float32
)
# scale attention_scores
dk
=
tf
.
cast
(
shape
_list
(
k
)[
-
1
],
tf
.
float32
)
# scale attention_scores
w
=
w
/
tf
.
math
.
sqrt
(
dk
)
# w has shape [batch, heads, dst_sequence, src_sequence], where information flows from src to dst.
...
...
transformers/modeling_tf_roberta.py
View file @
adb5c79f
...
...
@@ -24,7 +24,7 @@ import numpy as np
import
tensorflow
as
tf
from
.configuration_roberta
import
RobertaConfig
from
.modeling_tf_utils
import
TFPreTrainedModel
,
get_initializer
from
.modeling_tf_utils
import
TFPreTrainedModel
,
get_initializer
,
shape_list
from
.file_utils
import
add_start_docstrings
from
.modeling_tf_bert
import
TFBertEmbeddings
,
TFBertMainLayer
,
gelu
,
gelu_new
...
...
@@ -51,9 +51,9 @@ class TFRobertaEmbeddings(TFBertEmbeddings):
input_ids
,
position_ids
,
token_type_ids
,
inputs_embeds
=
inputs
if
input_ids
is
not
None
:
seq_length
=
tf
.
shape
(
input_ids
)[
1
]
seq_length
=
shape
_list
(
input_ids
)[
1
]
else
:
seq_length
=
tf
.
shape
(
inputs_embeds
)[
1
]
seq_length
=
shape
_list
(
inputs_embeds
)[
1
]
if
position_ids
is
None
:
position_ids
=
tf
.
range
(
self
.
padding_idx
+
1
,
seq_length
+
self
.
padding_idx
+
1
,
dtype
=
tf
.
int32
)[
tf
.
newaxis
,
:]
...
...
transformers/modeling_tf_transfo_xl.py
View file @
adb5c79f
...
...
@@ -337,7 +337,7 @@ class TFAdaptiveEmbedding(tf.keras.layers.Layer):
emb_i
=
tf
.
einsum
(
'id,de->ie'
,
emb_i
,
self
.
emb_projs
[
i
])
mask_idx
=
tf
.
cast
(
tf
.
where
(
mask_i
),
dtype
=
tf
.
int64
)
emb_flat
+=
tf
.
scatter_nd
(
mask_idx
,
emb_i
,
tf
.
cast
(
tf
.
shape
(
emb_flat
),
dtype
=
tf
.
int64
))
emb_flat
+=
tf
.
scatter_nd
(
mask_idx
,
emb_i
,
tf
.
cast
(
shape
_list
(
emb_flat
),
dtype
=
tf
.
int64
))
embed_shape
=
shape_list
(
inp
)
+
[
self
.
d_proj
]
embed
=
tf
.
reshape
(
emb_flat
,
embed_shape
)
...
...
transformers/modeling_tf_transfo_xl_utilities.py
View file @
adb5c79f
...
...
@@ -105,7 +105,7 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
@
staticmethod
def
_gather_logprob
(
logprob
,
target
):
lp_size
=
tf
.
shape
(
logprob
)
lp_size
=
shape
_list
(
logprob
)
r
=
tf
.
range
(
lp_size
[
0
])
idx
=
tf
.
stack
([
r
,
target
],
1
)
return
tf
.
gather_nd
(
logprob
,
idx
)
...
...
@@ -159,7 +159,7 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
cur_logprob
=
self
.
_gather_logprob
(
cur_tail_logprob
,
cur_target
)
cur_logprob
+=
cur_head_logprob
[:,
self
.
cutoff_ends
[
1
]
+
i
-
1
]
if
target
is
not
None
:
loss
+=
tf
.
scatter_nd
(
mask_idx
,
-
cur_logprob
,
tf
.
cast
(
tf
.
shape
(
loss
),
dtype
=
tf
.
int64
))
loss
+=
tf
.
scatter_nd
(
mask_idx
,
-
cur_logprob
,
tf
.
cast
(
shape
_list
(
loss
),
dtype
=
tf
.
int64
))
out
=
tf
.
concat
(
out
,
axis
=-
1
)
if
target
is
not
None
:
...
...
transformers/modeling_tf_utils.py
View file @
adb5c79f
...
...
@@ -494,7 +494,7 @@ class TFSequenceSummary(tf.keras.layers.Layer):
def
shape_list
(
x
):
"""Deal with dynamic shape in tensorflow cleanly."""
static
=
x
.
shape
.
as_list
()
dynamic
=
tf
.
shape
(
x
)
dynamic
=
shape
_list
(
x
)
return
[
dynamic
[
i
]
if
s
is
None
else
s
for
i
,
s
in
enumerate
(
static
)]
def
get_initializer
(
initializer_range
=
0.02
):
...
...
transformers/modeling_tf_xlnet.py
View file @
adb5c79f
...
...
@@ -112,8 +112,7 @@ class TFXLNetRelativeAttention(tf.keras.layers.Layer):
def
prune_heads
(
self
,
heads
):
raise
NotImplementedError
@
staticmethod
def
rel_shift
(
x
,
klen
=-
1
):
def
rel_shift
(
self
,
x
,
klen
=-
1
):
"""perform relative shift to form the relative attention score."""
x_size
=
shape_list
(
x
)
...
...
@@ -135,7 +134,7 @@ class TFXLNetRelativeAttention(tf.keras.layers.Layer):
# position based attention score
bd
=
tf
.
einsum
(
'ibnd,jbnd->ijbn'
,
q_head
+
self
.
r_r_bias
,
k_head_r
)
bd
=
self
.
rel_shift
(
bd
,
klen
=
ac
.
shape
[
1
])
bd
=
self
.
rel_shift
(
bd
,
klen
=
shape
_list
(
ac
)
[
1
])
# segment based attention score
if
seg_mat
is
None
:
...
...
@@ -192,7 +191,7 @@ class TFXLNetRelativeAttention(tf.keras.layers.Layer):
if
g
is
not
None
:
###### Two-stream attention with relative positional encoding.
# content based attention score
if
mems
is
not
None
and
mems
.
shape
.
ndims
>
1
:
if
mems
is
not
None
and
len
(
shape_list
(
mems
))
>
1
:
cat
=
tf
.
concat
([
mems
,
h
],
axis
=
0
)
else
:
cat
=
h
...
...
@@ -252,7 +251,7 @@ class TFXLNetRelativeAttention(tf.keras.layers.Layer):
else
:
###### Multi-head attention with relative positional encoding
if
mems
is
not
None
and
mems
.
shape
.
ndims
>
1
:
if
mems
is
not
None
and
len
(
shape_list
(
mems
))
>
1
:
cat
=
tf
.
concat
([
mems
,
h
],
axis
=
0
)
else
:
cat
=
h
...
...
@@ -565,7 +564,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
if
data_mask
is
not
None
:
# all mems can be attended to
mems_mask
=
tf
.
zeros
([
tf
.
shape
(
data_mask
)[
0
],
mlen
,
bsz
],
mems_mask
=
tf
.
zeros
([
shape
_list
(
data_mask
)[
0
],
mlen
,
bsz
],
dtype
=
dtype_float
)
data_mask
=
tf
.
concat
([
mems_mask
,
data_mask
],
axis
=
1
)
if
attn_mask
is
None
:
...
...
@@ -590,7 +589,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
word_emb_k
=
self
.
word_embedding
(
input_ids
)
output_h
=
self
.
dropout
(
word_emb_k
,
training
=
training
)
if
target_mapping
is
not
None
:
word_emb_q
=
tf
.
tile
(
self
.
mask_emb
,
[
tf
.
shape
(
target_mapping
)[
0
],
bsz
,
1
])
word_emb_q
=
tf
.
tile
(
self
.
mask_emb
,
[
shape
_list
(
target_mapping
)[
0
],
bsz
,
1
])
# else: # We removed the inp_q input which was same as target mapping
# inp_q_ext = inp_q[:, :, None]
# word_emb_q = inp_q_ext * self.mask_emb + (1 - inp_q_ext) * word_emb_k
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment