Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Fairseq
Commits
4973d05a
Commit
4973d05a
authored
May 09, 2018
by
Myle Ott
Browse files
Flake8
parent
e40363d7
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
15 additions
and
15 deletions
+15
-15
fairseq/models/fconv.py
fairseq/models/fconv.py
+3
-3
fairseq/models/lstm.py
fairseq/models/lstm.py
+4
-7
fairseq/utils.py
fairseq/utils.py
+8
-5
No files found.
fairseq/models/fconv.py
View file @
4973d05a
...
@@ -119,7 +119,7 @@ class FConvEncoder(FairseqEncoder):
...
@@ -119,7 +119,7 @@ class FConvEncoder(FairseqEncoder):
self
.
projections
.
append
(
Linear
(
in_channels
,
out_channels
)
self
.
projections
.
append
(
Linear
(
in_channels
,
out_channels
)
if
in_channels
!=
out_channels
else
None
)
if
in_channels
!=
out_channels
else
None
)
if
kernel_size
%
2
==
1
:
if
kernel_size
%
2
==
1
:
padding
=
kernel_size
//
2
padding
=
kernel_size
//
2
else
:
else
:
padding
=
0
padding
=
0
self
.
convolutions
.
append
(
self
.
convolutions
.
append
(
...
...
fairseq/models/lstm.py
View file @
4973d05a
...
@@ -102,8 +102,7 @@ class LSTMEncoder(FairseqEncoder):
...
@@ -102,8 +102,7 @@ class LSTMEncoder(FairseqEncoder):
self
.
padding_idx
=
dictionary
.
pad
()
self
.
padding_idx
=
dictionary
.
pad
()
self
.
embed_tokens
=
Embedding
(
num_embeddings
,
embed_dim
,
self
.
padding_idx
)
self
.
embed_tokens
=
Embedding
(
num_embeddings
,
embed_dim
,
self
.
padding_idx
)
if
embed_dict
:
if
embed_dict
:
self
.
embed_tokens
=
utils
.
load_embedding
(
self
.
embed_tokens
=
utils
.
load_embedding
(
embed_dict
,
self
.
dictionary
,
self
.
embed_tokens
)
embed_dict
,
self
.
dictionary
,
self
.
embed_tokens
)
self
.
lstm
=
LSTM
(
self
.
lstm
=
LSTM
(
input_size
=
embed_dim
,
input_size
=
embed_dim
,
...
@@ -195,9 +194,7 @@ class LSTMDecoder(FairseqIncrementalDecoder):
...
@@ -195,9 +194,7 @@ class LSTMDecoder(FairseqIncrementalDecoder):
padding_idx
=
dictionary
.
pad
()
padding_idx
=
dictionary
.
pad
()
self
.
embed_tokens
=
Embedding
(
num_embeddings
,
embed_dim
,
padding_idx
)
self
.
embed_tokens
=
Embedding
(
num_embeddings
,
embed_dim
,
padding_idx
)
if
embed_dict
:
if
embed_dict
:
self
.
embed_tokens
=
utils
.
load_embedding
(
self
.
embed_tokens
=
utils
.
load_embedding
(
embed_dict
,
self
.
dictionary
,
self
.
embed_tokens
)
embed_dict
,
self
.
dictionary
,
self
.
embed_tokens
)
self
.
layers
=
nn
.
ModuleList
([
self
.
layers
=
nn
.
ModuleList
([
LSTMCell
(
encoder_embed_dim
+
embed_dim
if
layer
==
0
else
embed_dim
,
embed_dim
)
LSTMCell
(
encoder_embed_dim
+
embed_dim
if
layer
==
0
else
embed_dim
,
embed_dim
)
...
...
fairseq/utils.py
View file @
4973d05a
...
@@ -254,6 +254,7 @@ def print_embed_overlap(embed_dict, vocab_dict):
...
@@ -254,6 +254,7 @@ def print_embed_overlap(embed_dict, vocab_dict):
overlap
=
len
(
embed_keys
&
vocab_keys
)
overlap
=
len
(
embed_keys
&
vocab_keys
)
print
(
"| Found {}/{} types in embedding file."
.
format
(
overlap
,
len
(
vocab_dict
)))
print
(
"| Found {}/{} types in embedding file."
.
format
(
overlap
,
len
(
vocab_dict
)))
def
parse_embedding
(
embed_path
):
def
parse_embedding
(
embed_path
):
"""Parse embedding text file into a dictionary of word and embedding tensors.
"""Parse embedding text file into a dictionary of word and embedding tensors.
...
@@ -267,12 +268,13 @@ def parse_embedding(embed_path):
...
@@ -267,12 +268,13 @@ def parse_embedding(embed_path):
"""
"""
embed_dict
=
dict
()
embed_dict
=
dict
()
with
open
(
embed_path
)
as
f_embed
:
with
open
(
embed_path
)
as
f_embed
:
_
=
next
(
f_embed
)
#
skip header
_
=
next
(
f_embed
)
#
skip header
for
line
in
f_embed
:
for
line
in
f_embed
:
pieces
=
line
.
strip
().
split
()
pieces
=
line
.
strip
().
split
()
embed_dict
[
pieces
[
0
]]
=
torch
.
Tensor
([
float
(
weight
)
for
weight
in
pieces
[
1
:]])
embed_dict
[
pieces
[
0
]]
=
torch
.
Tensor
([
float
(
weight
)
for
weight
in
pieces
[
1
:]])
return
embed_dict
return
embed_dict
def
load_embedding
(
embed_dict
,
vocab
,
embedding
):
def
load_embedding
(
embed_dict
,
vocab
,
embedding
):
for
idx
in
range
(
len
(
vocab
)):
for
idx
in
range
(
len
(
vocab
)):
token
=
vocab
[
idx
]
token
=
vocab
[
idx
]
...
@@ -280,6 +282,7 @@ def load_embedding(embed_dict, vocab, embedding):
...
@@ -280,6 +282,7 @@ def load_embedding(embed_dict, vocab, embedding):
embedding
.
weight
.
data
[
idx
]
=
embed_dict
[
token
]
embedding
.
weight
.
data
[
idx
]
=
embed_dict
[
token
]
return
embedding
return
embedding
def
replace_unk
(
hypo_str
,
src_str
,
alignment
,
align_dict
,
unk
):
def
replace_unk
(
hypo_str
,
src_str
,
alignment
,
align_dict
,
unk
):
from
fairseq
import
tokenizer
from
fairseq
import
tokenizer
# Tokens are strings here
# Tokens are strings here
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment