"...deployment/git@developer.sourcefind.cn:OpenDAS/nni.git" did not exist on "0f0c6288c35a456707057b0aab7754fcb5ac4ffd"
Commit 6b4c3ee2 authored by Julien Chaumond's avatar Julien Chaumond
Browse files

[run_lm_finetuning] GPT2 tokenizer doesn't have a pad_token

ping @lysandrejik
parent 79815bf6
...@@ -202,6 +202,7 @@ def mask_tokens(inputs: torch.Tensor, tokenizer: PreTrainedTokenizer, args) -> T ...@@ -202,6 +202,7 @@ def mask_tokens(inputs: torch.Tensor, tokenizer: PreTrainedTokenizer, args) -> T
tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist() tokenizer.get_special_tokens_mask(val, already_has_special_tokens=True) for val in labels.tolist()
] ]
probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0) probability_matrix.masked_fill_(torch.tensor(special_tokens_mask, dtype=torch.bool), value=0.0)
if tokenizer._pad_token is not None:
padding_mask = labels.eq(tokenizer.pad_token_id) padding_mask = labels.eq(tokenizer.pad_token_id)
probability_matrix.masked_fill_(padding_mask, value=0.0) probability_matrix.masked_fill_(padding_mask, value=0.0)
masked_indices = torch.bernoulli(probability_matrix).bool() masked_indices = torch.bernoulli(probability_matrix).bool()
...@@ -228,6 +229,8 @@ def train(args, train_dataset, model: PreTrainedModel, tokenizer: PreTrainedToke ...@@ -228,6 +229,8 @@ def train(args, train_dataset, model: PreTrainedModel, tokenizer: PreTrainedToke
args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu) args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
def collate(examples: List[torch.Tensor]): def collate(examples: List[torch.Tensor]):
if tokenizer._pad_token is None:
return pad_sequence(examples, batch_first=True)
return pad_sequence(examples, batch_first=True, padding_value=tokenizer.pad_token_id) return pad_sequence(examples, batch_first=True, padding_value=tokenizer.pad_token_id)
train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset) train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset)
...@@ -421,6 +424,8 @@ def evaluate(args, model: PreTrainedModel, tokenizer: PreTrainedTokenizer, prefi ...@@ -421,6 +424,8 @@ def evaluate(args, model: PreTrainedModel, tokenizer: PreTrainedTokenizer, prefi
# Note that DistributedSampler samples randomly # Note that DistributedSampler samples randomly
def collate(examples: List[torch.Tensor]): def collate(examples: List[torch.Tensor]):
if tokenizer._pad_token is None:
return pad_sequence(examples, batch_first=True)
return pad_sequence(examples, batch_first=True, padding_value=tokenizer.pad_token_id) return pad_sequence(examples, batch_first=True, padding_value=tokenizer.pad_token_id)
eval_sampler = SequentialSampler(eval_dataset) eval_sampler = SequentialSampler(eval_dataset)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment