Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
ae88eb88
"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "fb560dcb075497f61880010245192e7e1fdbeca4"
Commit
ae88eb88
authored
Dec 14, 2018
by
thomwolf
Browse files
set encoding to 'utf-8' in calls to open
parent
e1eab59a
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
13 additions
and
11 deletions
+13
-11
examples/extract_features.py
examples/extract_features.py
+1
-1
examples/run_classifier.py
examples/run_classifier.py
+3
-2
examples/run_squad.py
examples/run_squad.py
+2
-2
examples/run_swag.py
examples/run_swag.py
+3
-2
pytorch_pretrained_bert/file_utils.py
pytorch_pretrained_bert/file_utils.py
+1
-1
pytorch_pretrained_bert/modeling.py
pytorch_pretrained_bert/modeling.py
+2
-2
setup.py
setup.py
+1
-1
No files found.
examples/extract_features.py
View file @
ae88eb88
...
@@ -168,7 +168,7 @@ def read_examples(input_file):
...
@@ -168,7 +168,7 @@ def read_examples(input_file):
"""Read a list of `InputExample`s from an input file."""
"""Read a list of `InputExample`s from an input file."""
examples
=
[]
examples
=
[]
unique_id
=
0
unique_id
=
0
with
open
(
input_file
,
"r"
)
as
reader
:
with
open
(
input_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
while
True
:
while
True
:
line
=
reader
.
readline
()
line
=
reader
.
readline
()
if
not
line
:
if
not
line
:
...
...
examples/run_classifier.py
View file @
ae88eb88
...
@@ -91,7 +91,7 @@ class DataProcessor(object):
...
@@ -91,7 +91,7 @@ class DataProcessor(object):
@
classmethod
@
classmethod
def
_read_tsv
(
cls
,
input_file
,
quotechar
=
None
):
def
_read_tsv
(
cls
,
input_file
,
quotechar
=
None
):
"""Reads a tab separated value file."""
"""Reads a tab separated value file."""
with
open
(
input_file
,
"r"
)
as
f
:
with
open
(
input_file
,
"r"
,
encoding
=
'utf-8'
)
as
f
:
reader
=
csv
.
reader
(
f
,
delimiter
=
"
\t
"
,
quotechar
=
quotechar
)
reader
=
csv
.
reader
(
f
,
delimiter
=
"
\t
"
,
quotechar
=
quotechar
)
lines
=
[]
lines
=
[]
for
line
in
reader
:
for
line
in
reader
:
...
@@ -413,7 +413,8 @@ def main():
...
@@ -413,7 +413,8 @@ def main():
n_gpu
=
1
n_gpu
=
1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
logger
.
info
(
"device %s n_gpu %d distributed training %r"
,
device
,
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
))
logger
.
info
(
"device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}"
.
format
(
device
,
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
),
args
.
fp16
))
if
args
.
gradient_accumulation_steps
<
1
:
if
args
.
gradient_accumulation_steps
<
1
:
raise
ValueError
(
"Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
.
format
(
raise
ValueError
(
"Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
.
format
(
...
...
examples/run_squad.py
View file @
ae88eb88
...
@@ -108,7 +108,7 @@ class InputFeatures(object):
...
@@ -108,7 +108,7 @@ class InputFeatures(object):
def
read_squad_examples
(
input_file
,
is_training
):
def
read_squad_examples
(
input_file
,
is_training
):
"""Read a SQuAD json file into a list of SquadExample."""
"""Read a SQuAD json file into a list of SquadExample."""
with
open
(
input_file
,
"r"
)
as
reader
:
with
open
(
input_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
input_data
=
json
.
load
(
reader
)[
"data"
]
input_data
=
json
.
load
(
reader
)[
"data"
]
def
is_whitespace
(
c
):
def
is_whitespace
(
c
):
...
@@ -757,7 +757,7 @@ def main():
...
@@ -757,7 +757,7 @@ def main():
n_gpu
=
1
n_gpu
=
1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
logger
.
info
(
"device: {} n_gpu: {}, distributed training: {}, 16-bits traini
i
ng: {}"
.
format
(
logger
.
info
(
"device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}"
.
format
(
device
,
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
),
args
.
fp16
))
device
,
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
),
args
.
fp16
))
if
args
.
gradient_accumulation_steps
<
1
:
if
args
.
gradient_accumulation_steps
<
1
:
...
...
examples/run_swag.py
View file @
ae88eb88
...
@@ -100,7 +100,7 @@ class InputFeatures(object):
...
@@ -100,7 +100,7 @@ class InputFeatures(object):
def
read_swag_examples
(
input_file
,
is_training
):
def
read_swag_examples
(
input_file
,
is_training
):
with
open
(
input_file
,
'r'
)
as
f
:
with
open
(
input_file
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
reader
=
csv
.
reader
(
f
)
reader
=
csv
.
reader
(
f
)
lines
=
list
(
reader
)
lines
=
list
(
reader
)
...
@@ -333,7 +333,8 @@ def main():
...
@@ -333,7 +333,8 @@ def main():
n_gpu
=
1
n_gpu
=
1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
logger
.
info
(
"device %s n_gpu %d distributed training %r"
,
device
,
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
))
logger
.
info
(
"device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}"
.
format
(
device
,
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
),
args
.
fp16
))
if
args
.
gradient_accumulation_steps
<
1
:
if
args
.
gradient_accumulation_steps
<
1
:
raise
ValueError
(
"Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
.
format
(
raise
ValueError
(
"Invalid gradient_accumulation_steps parameter: {}, should be >= 1"
.
format
(
...
...
pytorch_pretrained_bert/file_utils.py
View file @
ae88eb88
...
@@ -227,7 +227,7 @@ def read_set_from_file(filename: str) -> Set[str]:
...
@@ -227,7 +227,7 @@ def read_set_from_file(filename: str) -> Set[str]:
Expected file format is one item per line.
Expected file format is one item per line.
'''
'''
collection
=
set
()
collection
=
set
()
with
open
(
filename
,
'r'
)
as
file_
:
with
open
(
filename
,
'r'
,
encoding
=
'utf-8'
)
as
file_
:
for
line
in
file_
:
for
line
in
file_
:
collection
.
add
(
line
.
rstrip
())
collection
.
add
(
line
.
rstrip
())
return
collection
return
collection
...
...
pytorch_pretrained_bert/modeling.py
View file @
ae88eb88
...
@@ -106,7 +106,7 @@ class BertConfig(object):
...
@@ -106,7 +106,7 @@ class BertConfig(object):
initializing all weight matrices.
initializing all weight matrices.
"""
"""
if
isinstance
(
vocab_size_or_config_json_file
,
str
):
if
isinstance
(
vocab_size_or_config_json_file
,
str
):
with
open
(
vocab_size_or_config_json_file
,
"r"
)
as
reader
:
with
open
(
vocab_size_or_config_json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
json_config
=
json
.
loads
(
reader
.
read
())
json_config
=
json
.
loads
(
reader
.
read
())
for
key
,
value
in
json_config
.
items
():
for
key
,
value
in
json_config
.
items
():
self
.
__dict__
[
key
]
=
value
self
.
__dict__
[
key
]
=
value
...
@@ -137,7 +137,7 @@ class BertConfig(object):
...
@@ -137,7 +137,7 @@ class BertConfig(object):
@
classmethod
@
classmethod
def
from_json_file
(
cls
,
json_file
):
def
from_json_file
(
cls
,
json_file
):
"""Constructs a `BertConfig` from a json file of parameters."""
"""Constructs a `BertConfig` from a json file of parameters."""
with
open
(
json_file
,
"r"
)
as
reader
:
with
open
(
json_file
,
"r"
,
encoding
=
'utf-8'
)
as
reader
:
text
=
reader
.
read
()
text
=
reader
.
read
()
return
cls
.
from_dict
(
json
.
loads
(
text
))
return
cls
.
from_dict
(
json
.
loads
(
text
))
...
...
setup.py
View file @
ae88eb88
...
@@ -41,7 +41,7 @@ setup(
...
@@ -41,7 +41,7 @@ setup(
author
=
"Thomas Wolf, Victor Sanh, Tim Rault, Google AI Language Team Authors"
,
author
=
"Thomas Wolf, Victor Sanh, Tim Rault, Google AI Language Team Authors"
,
author_email
=
"thomas@huggingface.co"
,
author_email
=
"thomas@huggingface.co"
,
description
=
"PyTorch version of Google AI BERT model with script to load Google pre-trained models"
,
description
=
"PyTorch version of Google AI BERT model with script to load Google pre-trained models"
,
long_description
=
open
(
"README.md"
,
"r"
).
read
(),
long_description
=
open
(
"README.md"
,
"r"
,
encoding
=
'utf-8'
).
read
(),
long_description_content_type
=
"text/markdown"
,
long_description_content_type
=
"text/markdown"
,
keywords
=
'BERT NLP deep learning google'
,
keywords
=
'BERT NLP deep learning google'
,
license
=
'Apache'
,
license
=
'Apache'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment