"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "fb560dcb075497f61880010245192e7e1fdbeca4"
Commit ae88eb88 authored by thomwolf's avatar thomwolf
Browse files

set encoding to 'utf-8' in calls to open

parent e1eab59a
...@@ -168,7 +168,7 @@ def read_examples(input_file): ...@@ -168,7 +168,7 @@ def read_examples(input_file):
"""Read a list of `InputExample`s from an input file.""" """Read a list of `InputExample`s from an input file."""
examples = [] examples = []
unique_id = 0 unique_id = 0
with open(input_file, "r") as reader: with open(input_file, "r", encoding='utf-8') as reader:
while True: while True:
line = reader.readline() line = reader.readline()
if not line: if not line:
......
...@@ -91,7 +91,7 @@ class DataProcessor(object): ...@@ -91,7 +91,7 @@ class DataProcessor(object):
@classmethod @classmethod
def _read_tsv(cls, input_file, quotechar=None): def _read_tsv(cls, input_file, quotechar=None):
"""Reads a tab separated value file.""" """Reads a tab separated value file."""
with open(input_file, "r") as f: with open(input_file, "r", encoding='utf-8') as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar) reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
lines = [] lines = []
for line in reader: for line in reader:
...@@ -413,7 +413,8 @@ def main(): ...@@ -413,7 +413,8 @@ def main():
n_gpu = 1 n_gpu = 1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.distributed.init_process_group(backend='nccl') torch.distributed.init_process_group(backend='nccl')
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1)) logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
device, n_gpu, bool(args.local_rank != -1), args.fp16))
if args.gradient_accumulation_steps < 1: if args.gradient_accumulation_steps < 1:
raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
......
...@@ -108,7 +108,7 @@ class InputFeatures(object): ...@@ -108,7 +108,7 @@ class InputFeatures(object):
def read_squad_examples(input_file, is_training): def read_squad_examples(input_file, is_training):
"""Read a SQuAD json file into a list of SquadExample.""" """Read a SQuAD json file into a list of SquadExample."""
with open(input_file, "r") as reader: with open(input_file, "r", encoding='utf-8') as reader:
input_data = json.load(reader)["data"] input_data = json.load(reader)["data"]
def is_whitespace(c): def is_whitespace(c):
...@@ -757,7 +757,7 @@ def main(): ...@@ -757,7 +757,7 @@ def main():
n_gpu = 1 n_gpu = 1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.distributed.init_process_group(backend='nccl') torch.distributed.init_process_group(backend='nccl')
logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits trainiing: {}".format( logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
device, n_gpu, bool(args.local_rank != -1), args.fp16)) device, n_gpu, bool(args.local_rank != -1), args.fp16))
if args.gradient_accumulation_steps < 1: if args.gradient_accumulation_steps < 1:
......
...@@ -100,7 +100,7 @@ class InputFeatures(object): ...@@ -100,7 +100,7 @@ class InputFeatures(object):
def read_swag_examples(input_file, is_training): def read_swag_examples(input_file, is_training):
with open(input_file, 'r') as f: with open(input_file, 'r', encoding='utf-8') as f:
reader = csv.reader(f) reader = csv.reader(f)
lines = list(reader) lines = list(reader)
...@@ -333,7 +333,8 @@ def main(): ...@@ -333,7 +333,8 @@ def main():
n_gpu = 1 n_gpu = 1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.distributed.init_process_group(backend='nccl') torch.distributed.init_process_group(backend='nccl')
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1)) logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
device, n_gpu, bool(args.local_rank != -1), args.fp16))
if args.gradient_accumulation_steps < 1: if args.gradient_accumulation_steps < 1:
raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
......
...@@ -227,7 +227,7 @@ def read_set_from_file(filename: str) -> Set[str]: ...@@ -227,7 +227,7 @@ def read_set_from_file(filename: str) -> Set[str]:
Expected file format is one item per line. Expected file format is one item per line.
''' '''
collection = set() collection = set()
with open(filename, 'r') as file_: with open(filename, 'r', encoding='utf-8') as file_:
for line in file_: for line in file_:
collection.add(line.rstrip()) collection.add(line.rstrip())
return collection return collection
......
...@@ -106,7 +106,7 @@ class BertConfig(object): ...@@ -106,7 +106,7 @@ class BertConfig(object):
initializing all weight matrices. initializing all weight matrices.
""" """
if isinstance(vocab_size_or_config_json_file, str): if isinstance(vocab_size_or_config_json_file, str):
with open(vocab_size_or_config_json_file, "r") as reader: with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
json_config = json.loads(reader.read()) json_config = json.loads(reader.read())
for key, value in json_config.items(): for key, value in json_config.items():
self.__dict__[key] = value self.__dict__[key] = value
...@@ -137,7 +137,7 @@ class BertConfig(object): ...@@ -137,7 +137,7 @@ class BertConfig(object):
@classmethod @classmethod
def from_json_file(cls, json_file): def from_json_file(cls, json_file):
"""Constructs a `BertConfig` from a json file of parameters.""" """Constructs a `BertConfig` from a json file of parameters."""
with open(json_file, "r") as reader: with open(json_file, "r", encoding='utf-8') as reader:
text = reader.read() text = reader.read()
return cls.from_dict(json.loads(text)) return cls.from_dict(json.loads(text))
......
...@@ -41,7 +41,7 @@ setup( ...@@ -41,7 +41,7 @@ setup(
author="Thomas Wolf, Victor Sanh, Tim Rault, Google AI Language Team Authors", author="Thomas Wolf, Victor Sanh, Tim Rault, Google AI Language Team Authors",
author_email="thomas@huggingface.co", author_email="thomas@huggingface.co",
description="PyTorch version of Google AI BERT model with script to load Google pre-trained models", description="PyTorch version of Google AI BERT model with script to load Google pre-trained models",
long_description=open("README.md", "r").read(), long_description=open("README.md", "r", encoding='utf-8').read(),
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
keywords='BERT NLP deep learning google', keywords='BERT NLP deep learning google',
license='Apache', license='Apache',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment