Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Fairseq
Commits
f6ac1aec
Commit
f6ac1aec
authored
Nov 01, 2017
by
Sergey Edunov
Browse files
Force UTF-8 encoding for dictionary files ( #41 )
parent
bb3be24d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
3 deletions
+7
-3
fairseq/dictionary.py
fairseq/dictionary.py
+7
-3
No files found.
fairseq/dictionary.py
View file @
f6ac1aec
...
@@ -109,8 +109,12 @@ class Dictionary(object):
...
@@ -109,8 +109,12 @@ class Dictionary(object):
"""
"""
if
isinstance
(
f
,
str
):
if
isinstance
(
f
,
str
):
with
open
(
f
,
'r'
)
as
fd
:
try
:
return
Dictionary
.
load
(
fd
)
with
open
(
f
,
'r'
,
encoding
=
'utf-8'
)
as
fd
:
return
Dictionary
.
load
(
fd
)
except
:
raise
Exception
(
"Incorrect encoding detected in {}, please "
"rebuild the dataset"
.
format
(
f
))
d
=
Dictionary
()
d
=
Dictionary
()
for
line
in
f
.
readlines
():
for
line
in
f
.
readlines
():
...
@@ -125,7 +129,7 @@ class Dictionary(object):
...
@@ -125,7 +129,7 @@ class Dictionary(object):
def
save
(
self
,
f
,
threshold
=
3
,
nwords
=-
1
):
def
save
(
self
,
f
,
threshold
=
3
,
nwords
=-
1
):
"""Stores dictionary into a text file"""
"""Stores dictionary into a text file"""
if
isinstance
(
f
,
str
):
if
isinstance
(
f
,
str
):
with
open
(
f
,
'w'
)
as
fd
:
with
open
(
f
,
'w'
,
encoding
=
'utf-8'
)
as
fd
:
return
self
.
save
(
fd
,
threshold
,
nwords
)
return
self
.
save
(
fd
,
threshold
,
nwords
)
cnt
=
0
cnt
=
0
for
i
,
t
in
enumerate
(
zip
(
self
.
symbols
,
self
.
count
)):
for
i
,
t
in
enumerate
(
zip
(
self
.
symbols
,
self
.
count
)):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment