Unverified Commit 32dbb2d9 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

make style (#11442)

parent 04ab2ca6
...@@ -74,7 +74,7 @@ def to_list(tensor): ...@@ -74,7 +74,7 @@ def to_list(tensor):
def train(args, train_dataset, model, tokenizer): def train(args, train_dataset, model, tokenizer):
""" Train the model """ """Train the model"""
if args.local_rank in [-1, 0]: if args.local_rank in [-1, 0]:
tb_writer = SummaryWriter() tb_writer = SummaryWriter()
......
...@@ -61,7 +61,7 @@ def accuracy(out, labels): ...@@ -61,7 +61,7 @@ def accuracy(out, labels):
def load_rocstories_dataset(dataset_path): def load_rocstories_dataset(dataset_path):
""" Output a list of tuples(story, 1st continuation, 2nd continuation, label) """ """Output a list of tuples(story, 1st continuation, 2nd continuation, label)"""
with open(dataset_path, encoding="utf_8") as f: with open(dataset_path, encoding="utf_8") as f:
f = csv.reader(f) f = csv.reader(f)
output = [] output = []
...@@ -184,7 +184,7 @@ def main(): ...@@ -184,7 +184,7 @@ def main():
# Load and encode the datasets # Load and encode the datasets
def tokenize_and_encode(obj): def tokenize_and_encode(obj):
""" Tokenize and encode a nested object """ """Tokenize and encode a nested object"""
if isinstance(obj, str): if isinstance(obj, str):
return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(obj)) return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(obj))
elif isinstance(obj, int): elif isinstance(obj, int):
......
...@@ -276,7 +276,7 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal ...@@ -276,7 +276,7 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
def train(args, train_dataset, model, tokenizer): def train(args, train_dataset, model, tokenizer):
""" Train the model """ """Train the model"""
if args.local_rank in [-1, 0]: if args.local_rank in [-1, 0]:
tb_writer = SummaryWriter() tb_writer = SummaryWriter()
......
...@@ -19,7 +19,7 @@ import fire ...@@ -19,7 +19,7 @@ import fire
def minify(src_dir: str, dest_dir: str, n: int): def minify(src_dir: str, dest_dir: str, n: int):
"""Write first n lines of each file f in src_dir to dest_dir/f """ """Write first n lines of each file f in src_dir to dest_dir/f"""
src_dir = Path(src_dir) src_dir = Path(src_dir)
dest_dir = Path(dest_dir) dest_dir = Path(dest_dir)
dest_dir.mkdir(exist_ok=True) dest_dir.mkdir(exist_ok=True)
......
...@@ -71,7 +71,7 @@ def set_seed(args): ...@@ -71,7 +71,7 @@ def set_seed(args):
def train(args, train_dataset, model, tokenizer): def train(args, train_dataset, model, tokenizer):
""" Train the model """ """Train the model"""
if args.local_rank in [-1, 0]: if args.local_rank in [-1, 0]:
tb_writer = SummaryWriter() tb_writer = SummaryWriter()
......
...@@ -251,7 +251,7 @@ class TransformerDecoder(nn.Module): ...@@ -251,7 +251,7 @@ class TransformerDecoder(nn.Module):
return output, state # , state return output, state # , state
def init_decoder_state(self, src, memory_bank, with_cache=False): def init_decoder_state(self, src, memory_bank, with_cache=False):
""" Init decoder state """ """Init decoder state"""
state = TransformerDecoderState(src) state = TransformerDecoderState(src)
if with_cache: if with_cache:
state._init_cache(memory_bank, self.num_layers) state._init_cache(memory_bank, self.num_layers)
...@@ -479,11 +479,11 @@ class MultiHeadedAttention(nn.Module): ...@@ -479,11 +479,11 @@ class MultiHeadedAttention(nn.Module):
head_count = self.head_count head_count = self.head_count
def shape(x): def shape(x):
""" projection """ """projection"""
return x.view(batch_size, -1, head_count, dim_per_head).transpose(1, 2) return x.view(batch_size, -1, head_count, dim_per_head).transpose(1, 2)
def unshape(x): def unshape(x):
""" compute context """ """compute context"""
return x.transpose(1, 2).contiguous().view(batch_size, -1, head_count * dim_per_head) return x.transpose(1, 2).contiguous().view(batch_size, -1, head_count * dim_per_head)
# 1) Project key, value, and query. # 1) Project key, value, and query.
...@@ -571,12 +571,12 @@ class DecoderState(object): ...@@ -571,12 +571,12 @@ class DecoderState(object):
""" """
def detach(self): def detach(self):
""" Need to document this """ """Need to document this"""
self.hidden = tuple([_.detach() for _ in self.hidden]) self.hidden = tuple([_.detach() for _ in self.hidden])
self.input_feed = self.input_feed.detach() self.input_feed = self.input_feed.detach()
def beam_update(self, idx, positions, beam_size): def beam_update(self, idx, positions, beam_size):
""" Need to document this """ """Need to document this"""
for e in self._all: for e in self._all:
sizes = e.size() sizes = e.size()
br = sizes[1] br = sizes[1]
...@@ -592,7 +592,7 @@ class DecoderState(object): ...@@ -592,7 +592,7 @@ class DecoderState(object):
class TransformerDecoderState(DecoderState): class TransformerDecoderState(DecoderState):
""" Transformer Decoder state base class """ """Transformer Decoder state base class"""
def __init__(self, src): def __init__(self, src):
""" """
...@@ -638,7 +638,7 @@ class TransformerDecoderState(DecoderState): ...@@ -638,7 +638,7 @@ class TransformerDecoderState(DecoderState):
self.cache["layer_{}".format(l)] = layer_cache self.cache["layer_{}".format(l)] = layer_cache
def repeat_beam_size_times(self, beam_size): def repeat_beam_size_times(self, beam_size):
""" Repeat beam_size times along batch dimension. """ """Repeat beam_size times along batch dimension."""
self.src = self.src.data.repeat(1, beam_size, 1) self.src = self.src.data.repeat(1, beam_size, 1)
def map_batch_fn(self, fn): def map_batch_fn(self, fn):
......
...@@ -25,19 +25,19 @@ class SummarizationDataProcessingTest(unittest.TestCase): ...@@ -25,19 +25,19 @@ class SummarizationDataProcessingTest(unittest.TestCase):
self.block_size = 10 self.block_size = 10
def test_fit_to_block_sequence_too_small(self): def test_fit_to_block_sequence_too_small(self):
""" Pad the sequence with 0 if the sequence is smaller than the block size.""" """Pad the sequence with 0 if the sequence is smaller than the block size."""
sequence = [1, 2, 3, 4] sequence = [1, 2, 3, 4]
expected_output = [1, 2, 3, 4, 0, 0, 0, 0, 0, 0] expected_output = [1, 2, 3, 4, 0, 0, 0, 0, 0, 0]
self.assertEqual(truncate_or_pad(sequence, self.block_size, 0), expected_output) self.assertEqual(truncate_or_pad(sequence, self.block_size, 0), expected_output)
def test_fit_to_block_sequence_fit_exactly(self): def test_fit_to_block_sequence_fit_exactly(self):
""" Do nothing if the sequence is the right size. """ """Do nothing if the sequence is the right size."""
sequence = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] sequence = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
expected_output = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] expected_output = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
self.assertEqual(truncate_or_pad(sequence, self.block_size, 0), expected_output) self.assertEqual(truncate_or_pad(sequence, self.block_size, 0), expected_output)
def test_fit_to_block_sequence_too_big(self): def test_fit_to_block_sequence_too_big(self):
""" Truncate the sequence if it is too long. """ """Truncate the sequence if it is too long."""
sequence = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] sequence = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
expected_output = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] expected_output = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
self.assertEqual(truncate_or_pad(sequence, self.block_size, 0), expected_output) self.assertEqual(truncate_or_pad(sequence, self.block_size, 0), expected_output)
......
...@@ -47,7 +47,7 @@ class CNNDMDataset(Dataset): ...@@ -47,7 +47,7 @@ class CNNDMDataset(Dataset):
self.documents.append(path_to_story) self.documents.append(path_to_story)
def __len__(self): def __len__(self):
""" Returns the number of documents. """ """Returns the number of documents."""
return len(self.documents) return len(self.documents)
def __getitem__(self, idx): def __getitem__(self, idx):
......
...@@ -49,14 +49,14 @@ logger = logging.getLogger(__name__) ...@@ -49,14 +49,14 @@ logger = logging.getLogger(__name__)
def entropy(p): def entropy(p):
""" Compute the entropy of a probability distribution """ """Compute the entropy of a probability distribution"""
plogp = p * torch.log(p) plogp = p * torch.log(p)
plogp[p == 0] = 0 plogp[p == 0] = 0
return -plogp.sum(dim=-1) return -plogp.sum(dim=-1)
def print_2d_tensor(tensor): def print_2d_tensor(tensor):
""" Print a 2D tensor """ """Print a 2D tensor"""
logger.info("lv, h >\t" + "\t".join(f"{x + 1}" for x in range(len(tensor)))) logger.info("lv, h >\t" + "\t".join(f"{x + 1}" for x in range(len(tensor))))
for row in range(len(tensor)): for row in range(len(tensor)):
if tensor.dtype != torch.long: if tensor.dtype != torch.long:
......
...@@ -36,7 +36,7 @@ def save_model(model, dirpath): ...@@ -36,7 +36,7 @@ def save_model(model, dirpath):
def entropy(p, unlogit=False): def entropy(p, unlogit=False):
""" Compute the entropy of a probability distribution """ """Compute the entropy of a probability distribution"""
exponent = 2 exponent = 2
if unlogit: if unlogit:
p = torch.pow(p, exponent) p = torch.pow(p, exponent)
...@@ -46,7 +46,7 @@ def entropy(p, unlogit=False): ...@@ -46,7 +46,7 @@ def entropy(p, unlogit=False):
def print_2d_tensor(tensor): def print_2d_tensor(tensor):
""" Print a 2D tensor """ """Print a 2D tensor"""
logger.info("lv, h >\t" + "\t".join(f"{x + 1}" for x in range(len(tensor)))) logger.info("lv, h >\t" + "\t".join(f"{x + 1}" for x in range(len(tensor))))
for row in range(len(tensor)): for row in range(len(tensor)):
if tensor.dtype != torch.long: if tensor.dtype != torch.long:
......
...@@ -70,7 +70,7 @@ def get_wanted_result(result): ...@@ -70,7 +70,7 @@ def get_wanted_result(result):
def train(args, train_dataset, model, tokenizer, train_highway=False): def train(args, train_dataset, model, tokenizer, train_highway=False):
""" Train the model """ """Train the model"""
if args.local_rank in [-1, 0]: if args.local_rank in [-1, 0]:
tb_writer = SummaryWriter() tb_writer = SummaryWriter()
......
...@@ -92,7 +92,7 @@ def to_list(tensor): ...@@ -92,7 +92,7 @@ def to_list(tensor):
def train(args, train_dataset, model, tokenizer, teacher=None): def train(args, train_dataset, model, tokenizer, teacher=None):
""" Train the model """ """Train the model"""
if args.local_rank in [-1, 0]: if args.local_rank in [-1, 0]:
tb_writer = SummaryWriter() tb_writer = SummaryWriter()
......
...@@ -64,7 +64,7 @@ def set_seed(args): ...@@ -64,7 +64,7 @@ def set_seed(args):
def train(args, train_dataset, model, tokenizer, criterion): def train(args, train_dataset, model, tokenizer, criterion):
""" Train the model """ """Train the model"""
if args.local_rank in [-1, 0]: if args.local_rank in [-1, 0]:
tb_writer = SummaryWriter() tb_writer = SummaryWriter()
......
...@@ -393,7 +393,7 @@ class MaskedBertPreTrainedModel(PreTrainedModel): ...@@ -393,7 +393,7 @@ class MaskedBertPreTrainedModel(PreTrainedModel):
base_model_prefix = "bert" base_model_prefix = "bert"
def _init_weights(self, module): def _init_weights(self, module):
""" Initialize the weights """ """Initialize the weights"""
if isinstance(module, (nn.Linear, nn.Embedding)): if isinstance(module, (nn.Linear, nn.Embedding)):
# Slightly different from the TF version which uses truncated_normal for initialization # Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617 # cf https://github.com/pytorch/pytorch/pull/5617
......
...@@ -105,7 +105,7 @@ def regularization(model: nn.Module, mode: str): ...@@ -105,7 +105,7 @@ def regularization(model: nn.Module, mode: str):
def train(args, train_dataset, model, tokenizer, teacher=None): def train(args, train_dataset, model, tokenizer, teacher=None):
""" Train the model """ """Train the model"""
if args.local_rank in [-1, 0]: if args.local_rank in [-1, 0]:
tb_writer = SummaryWriter(log_dir=args.output_dir) tb_writer = SummaryWriter(log_dir=args.output_dir)
......
...@@ -113,7 +113,7 @@ def to_list(tensor): ...@@ -113,7 +113,7 @@ def to_list(tensor):
def train(args, train_dataset, model, tokenizer, teacher=None): def train(args, train_dataset, model, tokenizer, teacher=None):
""" Train the model """ """Train the model"""
if args.local_rank in [-1, 0]: if args.local_rank in [-1, 0]:
tb_writer = SummaryWriter(log_dir=args.output_dir) tb_writer = SummaryWriter(log_dir=args.output_dir)
......
...@@ -96,7 +96,7 @@ def write_msg(msg: Dict): ...@@ -96,7 +96,7 @@ def write_msg(msg: Dict):
def read_msg() -> Optional[Dict]: def read_msg() -> Optional[Dict]:
"""Read Line delimited JSON from stdin. """ """Read Line delimited JSON from stdin."""
msg = json.loads(sys.stdin.readline().strip()) msg = json.loads(sys.stdin.readline().strip())
if "terminate" in (msg.get("type"), msg.get("event")): if "terminate" in (msg.get("type"), msg.get("event")):
......
...@@ -124,7 +124,7 @@ class DataProcessor: ...@@ -124,7 +124,7 @@ class DataProcessor:
class SingleSentenceClassificationProcessor(DataProcessor): class SingleSentenceClassificationProcessor(DataProcessor):
""" Generic processor for a single sentence classification data set.""" """Generic processor for a single sentence classification data set."""
def __init__(self, labels=None, examples=None, mode="classification", verbose=False): def __init__(self, labels=None, examples=None, mode="classification", verbose=False):
self.labels = [] if labels is None else labels self.labels = [] if labels is None else labels
......
...@@ -1462,7 +1462,7 @@ def tf_required(func): ...@@ -1462,7 +1462,7 @@ def tf_required(func):
def is_tensor(x): def is_tensor(x):
""" Tests if ``x`` is a :obj:`torch.Tensor`, :obj:`tf.Tensor` or :obj:`np.ndarray`. """ """Tests if ``x`` is a :obj:`torch.Tensor`, :obj:`tf.Tensor` or :obj:`np.ndarray`."""
if is_torch_available(): if is_torch_available():
import torch import torch
...@@ -1684,7 +1684,7 @@ class _BaseLazyModule(ModuleType): ...@@ -1684,7 +1684,7 @@ class _BaseLazyModule(ModuleType):
def copy_func(f): def copy_func(f):
""" Returns a copy of a function f.""" """Returns a copy of a function f."""
# Based on http://stackoverflow.com/a/6528148/190597 (Glenn Maynard) # Based on http://stackoverflow.com/a/6528148/190597 (Glenn Maynard)
g = types.FunctionType(f.__code__, f.__globals__, name=f.__name__, argdefs=f.__defaults__, closure=f.__closure__) g = types.FunctionType(f.__code__, f.__globals__, name=f.__name__, argdefs=f.__defaults__, closure=f.__closure__)
g = functools.update_wrapper(g, f) g = functools.update_wrapper(g, f)
......
...@@ -215,6 +215,6 @@ class ModelCard: ...@@ -215,6 +215,6 @@ class ModelCard:
return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
def to_json_file(self, json_file_path): def to_json_file(self, json_file_path):
""" Save this instance to a json file.""" """Save this instance to a json file."""
with open(json_file_path, "w", encoding="utf-8") as writer: with open(json_file_path, "w", encoding="utf-8") as writer:
writer.write(self.to_json_string()) writer.write(self.to_json_string())
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment