Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Fairseq
Commits
18a6d85c
Commit
18a6d85c
authored
Dec 26, 2017
by
Myle Ott
Browse files
Add explicit dimension to softmax calls
parent
7da4e062
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
10 additions
and
10 deletions
+10
-10
README.md
README.md
+2
-2
fairseq/criterions/label_smoothed_cross_entropy.py
fairseq/criterions/label_smoothed_cross_entropy.py
+1
-1
fairseq/models/fconv.py
fairseq/models/fconv.py
+3
-3
fairseq/models/lstm.py
fairseq/models/lstm.py
+1
-1
fairseq/sequence_generator.py
fairseq/sequence_generator.py
+1
-1
requirements.txt
requirements.txt
+1
-1
setup.py
setup.py
+1
-1
No files found.
README.md
View file @
18a6d85c
...
@@ -24,8 +24,8 @@ If you use the code in your paper, then please cite it as:
...
@@ -24,8 +24,8 @@ If you use the code in your paper, then please cite it as:
*
Python version 3.6
*
Python version 3.6
*
A
[
PyTorch installation
](
http://pytorch.org/
)
*
A
[
PyTorch installation
](
http://pytorch.org/
)
Currently fairseq-py requires
installing PyTorch from source
.
Currently fairseq-py requires
PyTorch version >= 0.3.0
.
Please follow the instructions here: https://github.com/pytorch/pytorch#
from-source
.
Please follow the instructions here: https://github.com/pytorch/pytorch#
installation
.
If you use Docker make sure to increase the shared memory size either with
`--ipc=host`
or
`--shm-size`
as command line
If you use Docker make sure to increase the shared memory size either with
`--ipc=host`
or
`--shm-size`
as command line
options to
`nvidia-docker run`
.
options to
`nvidia-docker run`
.
...
...
fairseq/criterions/label_smoothed_cross_entropy.py
View file @
18a6d85c
...
@@ -57,7 +57,7 @@ class LabelSmoothedCrossEntropyCriterion(FairseqCriterion):
...
@@ -57,7 +57,7 @@ class LabelSmoothedCrossEntropyCriterion(FairseqCriterion):
3) logging outputs to display while training
3) logging outputs to display while training
"""
"""
net_output
=
model
(
**
sample
[
'net_input'
])
net_output
=
model
(
**
sample
[
'net_input'
])
input
=
F
.
log_softmax
(
net_output
.
view
(
-
1
,
net_output
.
size
(
-
1
)))
input
=
F
.
log_softmax
(
net_output
.
view
(
-
1
,
net_output
.
size
(
-
1
))
,
dim
=
1
)
target
=
sample
[
'target'
].
view
(
-
1
)
target
=
sample
[
'target'
].
view
(
-
1
)
loss
=
LabelSmoothedNLLLoss
.
apply
(
input
,
target
,
self
.
eps
,
self
.
padding_idx
,
self
.
weights
)
loss
=
LabelSmoothedNLLLoss
.
apply
(
input
,
target
,
self
.
eps
,
self
.
padding_idx
,
self
.
weights
)
sample_size
=
sample
[
'target'
].
size
(
0
)
if
self
.
args
.
sentence_avg
else
sample
[
'ntokens'
]
sample_size
=
sample
[
'target'
].
size
(
0
)
if
self
.
args
.
sentence_avg
else
sample
[
'ntokens'
]
...
...
fairseq/models/fconv.py
View file @
18a6d85c
...
@@ -87,7 +87,7 @@ class FConvEncoder(FairseqEncoder):
...
@@ -87,7 +87,7 @@ class FConvEncoder(FairseqEncoder):
residual
=
x
if
proj
is
None
else
proj
(
x
)
residual
=
x
if
proj
is
None
else
proj
(
x
)
x
=
F
.
dropout
(
x
,
p
=
self
.
dropout
,
training
=
self
.
training
)
x
=
F
.
dropout
(
x
,
p
=
self
.
dropout
,
training
=
self
.
training
)
x
=
conv
(
x
)
x
=
conv
(
x
)
x
=
F
.
glu
(
x
,
dim
=
-
1
)
x
=
F
.
glu
(
x
,
dim
=
2
)
x
=
(
x
+
residual
)
*
math
.
sqrt
(
0.5
)
x
=
(
x
+
residual
)
*
math
.
sqrt
(
0.5
)
# T x B x C -> B x T x C
# T x B x C -> B x T x C
...
@@ -128,7 +128,7 @@ class AttentionLayer(nn.Module):
...
@@ -128,7 +128,7 @@ class AttentionLayer(nn.Module):
# softmax over last dim
# softmax over last dim
sz
=
x
.
size
()
sz
=
x
.
size
()
x
=
F
.
softmax
(
x
.
view
(
sz
[
0
]
*
sz
[
1
],
sz
[
2
]))
x
=
F
.
softmax
(
x
.
view
(
sz
[
0
]
*
sz
[
1
],
sz
[
2
])
,
dim
=
1
)
x
=
x
.
view
(
sz
)
x
=
x
.
view
(
sz
)
attn_scores
=
x
attn_scores
=
x
...
@@ -234,7 +234,7 @@ class FConvDecoder(FairseqIncrementalDecoder):
...
@@ -234,7 +234,7 @@ class FConvDecoder(FairseqIncrementalDecoder):
x
=
F
.
dropout
(
x
,
p
=
self
.
dropout
,
training
=
self
.
training
)
x
=
F
.
dropout
(
x
,
p
=
self
.
dropout
,
training
=
self
.
training
)
x
=
conv
(
x
)
x
=
conv
(
x
)
x
=
conv
.
remove_future_timesteps
(
x
)
x
=
conv
.
remove_future_timesteps
(
x
)
x
=
F
.
glu
(
x
)
x
=
F
.
glu
(
x
,
dim
=
2
)
# attention
# attention
if
attention
is
not
None
:
if
attention
is
not
None
:
...
...
fairseq/models/lstm.py
View file @
18a6d85c
...
@@ -94,7 +94,7 @@ class AttentionLayer(nn.Module):
...
@@ -94,7 +94,7 @@ class AttentionLayer(nn.Module):
# compute attention
# compute attention
attn_scores
=
(
source_hids
*
x
.
unsqueeze
(
0
)).
sum
(
dim
=
2
)
attn_scores
=
(
source_hids
*
x
.
unsqueeze
(
0
)).
sum
(
dim
=
2
)
attn_scores
=
F
.
softmax
(
attn_scores
.
t
()).
t
()
# srclen x bsz
attn_scores
=
F
.
softmax
(
attn_scores
.
t
()
,
dim
=
1
).
t
()
# srclen x bsz
# sum weighted sources
# sum weighted sources
x
=
(
attn_scores
.
unsqueeze
(
2
)
*
source_hids
).
sum
(
dim
=
0
)
x
=
(
attn_scores
.
unsqueeze
(
2
)
*
source_hids
).
sum
(
dim
=
0
)
...
...
fairseq/sequence_generator.py
View file @
18a6d85c
...
@@ -326,7 +326,7 @@ class SequenceGenerator(object):
...
@@ -326,7 +326,7 @@ class SequenceGenerator(object):
avg_attn
=
None
avg_attn
=
None
for
model
,
encoder_out
in
zip
(
self
.
models
,
encoder_outs
):
for
model
,
encoder_out
in
zip
(
self
.
models
,
encoder_outs
):
decoder_out
,
attn
=
model
.
decoder
(
tokens
,
encoder_out
)
decoder_out
,
attn
=
model
.
decoder
(
tokens
,
encoder_out
)
probs
=
F
.
softmax
(
decoder_out
[:,
-
1
,
:]).
data
probs
=
F
.
softmax
(
decoder_out
[:,
-
1
,
:]
,
dim
=
1
).
data
attn
=
attn
[:,
-
1
,
:].
data
attn
=
attn
[:,
-
1
,
:].
data
if
avg_probs
is
None
or
avg_attn
is
None
:
if
avg_probs
is
None
or
avg_attn
is
None
:
avg_probs
=
probs
avg_probs
=
probs
...
...
requirements.txt
View file @
18a6d85c
cffi
cffi
numpy
numpy
torch
torch
>=0.3.0
tqdm
tqdm
setup.py
View file @
18a6d85c
...
@@ -54,7 +54,7 @@ class build_py_hook(build_py):
...
@@ -54,7 +54,7 @@ class build_py_hook(build_py):
setup
(
setup
(
name
=
'fairseq'
,
name
=
'fairseq'
,
version
=
'0.
2
.0'
,
version
=
'0.
3
.0'
,
description
=
'Facebook AI Research Sequence-to-Sequence Toolkit'
,
description
=
'Facebook AI Research Sequence-to-Sequence Toolkit'
,
long_description
=
readme
,
long_description
=
readme
,
license
=
license
,
license
=
license
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment