"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "21c88a07b72801e79c4a70844ee096fc85ef1ca4"
Unverified Commit 531336bb authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Fix deprecation warnings for int div (#15180)



* Fix deprecation warnings for int div
Co-authored-by: default avatarmgoldey <matthew.goldey@gmail.com>

* Fix import

* ensure that tensor output is python scalar

* make backward compatible

* make code more readable

* adapt test functions
Co-authored-by: default avatarmgoldey <matthew.goldey@gmail.com>
Co-authored-by: default avatarPatrick von Platen <patrick.v.platen@gmail.com>
parent f6d3fee8
...@@ -302,6 +302,8 @@ class DataCollatorForWav2Vec2Pretraining: ...@@ -302,6 +302,8 @@ class DataCollatorForWav2Vec2Pretraining:
batch_size = batch["input_values"].shape[0] batch_size = batch["input_values"].shape[0]
mask_indices_seq_length = self.model._get_feat_extract_output_lengths(batch["input_values"].shape[-1]) mask_indices_seq_length = self.model._get_feat_extract_output_lengths(batch["input_values"].shape[-1])
# make sure masked sequence length is a Python scalar
mask_indices_seq_length = int(mask_indices_seq_length)
# make sure that no loss is computed on padded inputs # make sure that no loss is computed on padded inputs
if batch.get("attention_mask") is not None: if batch.get("attention_mask") is not None:
......
...@@ -23,6 +23,7 @@ from functools import partial ...@@ -23,6 +23,7 @@ from functools import partial
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
import torch import torch
from packaging import version
from torch import Tensor, device, nn from torch import Tensor, device, nn
from torch.nn import CrossEntropyLoss from torch.nn import CrossEntropyLoss
...@@ -2362,3 +2363,13 @@ def apply_chunking_to_forward( ...@@ -2362,3 +2363,13 @@ def apply_chunking_to_forward(
return torch.cat(output_chunks, dim=chunk_dim) return torch.cat(output_chunks, dim=chunk_dim)
return forward_fn(*input_tensors) return forward_fn(*input_tensors)
def torch_int_div(tensor1, tensor2):
"""
A function that performs integer division across different versions of PyTorch.
"""
if version.parse(torch.__version__) < version.parse("1.8.0"):
return tensor1 // tensor2
else:
return torch.div(tensor1, tensor2, rounding_mode="floor")
...@@ -33,7 +33,7 @@ from ...file_utils import ( ...@@ -33,7 +33,7 @@ from ...file_utils import (
replace_return_docstrings, replace_return_docstrings,
) )
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
from ...modeling_utils import PreTrainedModel from ...modeling_utils import PreTrainedModel, torch_int_div
from ...utils import logging from ...utils import logging
from .configuration_hubert import HubertConfig from .configuration_hubert import HubertConfig
...@@ -829,7 +829,7 @@ class HubertPreTrainedModel(PreTrainedModel): ...@@ -829,7 +829,7 @@ class HubertPreTrainedModel(PreTrainedModel):
def _conv_out_length(input_length, kernel_size, stride): def _conv_out_length(input_length, kernel_size, stride):
# 1D convolutional layer output length formula taken # 1D convolutional layer output length formula taken
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
return (input_length - kernel_size) // stride + 1 return torch_int_div(input_length - kernel_size, stride) + 1
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride): for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
input_lengths = _conv_out_length(input_lengths, kernel_size, stride) input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
......
...@@ -29,7 +29,7 @@ from transformers.deepspeed import is_deepspeed_zero3_enabled ...@@ -29,7 +29,7 @@ from transformers.deepspeed import is_deepspeed_zero3_enabled
from ...activations import ACT2FN from ...activations import ACT2FN
from ...file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward from ...file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
from ...modeling_utils import PreTrainedModel from ...modeling_utils import PreTrainedModel, torch_int_div
from ...utils import logging from ...utils import logging
from .configuration_sew import SEWConfig from .configuration_sew import SEWConfig
...@@ -735,7 +735,7 @@ class SEWPreTrainedModel(PreTrainedModel): ...@@ -735,7 +735,7 @@ class SEWPreTrainedModel(PreTrainedModel):
def _conv_out_length(input_length, kernel_size, stride): def _conv_out_length(input_length, kernel_size, stride):
# 1D convolutional layer output length formula taken # 1D convolutional layer output length formula taken
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
return (input_length - kernel_size) // stride + 1 return torch_int_div(input_length - kernel_size, stride) + 1
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride): for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
input_lengths = _conv_out_length(input_lengths, kernel_size, stride) input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
......
...@@ -30,7 +30,7 @@ from transformers.deepspeed import is_deepspeed_zero3_enabled ...@@ -30,7 +30,7 @@ from transformers.deepspeed import is_deepspeed_zero3_enabled
from ...activations import ACT2FN from ...activations import ACT2FN
from ...file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward from ...file_utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
from ...modeling_utils import PreTrainedModel from ...modeling_utils import PreTrainedModel, torch_int_div
from ...utils import logging from ...utils import logging
from .configuration_sew_d import SEWDConfig from .configuration_sew_d import SEWDConfig
...@@ -1266,7 +1266,7 @@ class SEWDPreTrainedModel(PreTrainedModel): ...@@ -1266,7 +1266,7 @@ class SEWDPreTrainedModel(PreTrainedModel):
def _conv_out_length(input_length, kernel_size, stride): def _conv_out_length(input_length, kernel_size, stride):
# 1D convolutional layer output length formula taken # 1D convolutional layer output length formula taken
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
return (input_length - kernel_size) // stride + 1 return torch_int_div(input_length - kernel_size, stride) + 1
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride): for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
input_lengths = _conv_out_length(input_lengths, kernel_size, stride) input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
......
...@@ -35,7 +35,7 @@ from ...file_utils import ( ...@@ -35,7 +35,7 @@ from ...file_utils import (
replace_return_docstrings, replace_return_docstrings,
) )
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
from ...modeling_utils import PreTrainedModel from ...modeling_utils import PreTrainedModel, torch_int_div
from ...utils import logging from ...utils import logging
from .configuration_unispeech import UniSpeechConfig from .configuration_unispeech import UniSpeechConfig
...@@ -969,7 +969,7 @@ class UniSpeechPreTrainedModel(PreTrainedModel): ...@@ -969,7 +969,7 @@ class UniSpeechPreTrainedModel(PreTrainedModel):
def _conv_out_length(input_length, kernel_size, stride): def _conv_out_length(input_length, kernel_size, stride):
# 1D convolutional layer output length formula taken # 1D convolutional layer output length formula taken
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
return (input_length - kernel_size) // stride + 1 return torch_int_div(input_length - kernel_size, stride) + 1
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride): for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
input_lengths = _conv_out_length(input_lengths, kernel_size, stride) input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
......
...@@ -35,7 +35,7 @@ from ...file_utils import ( ...@@ -35,7 +35,7 @@ from ...file_utils import (
replace_return_docstrings, replace_return_docstrings,
) )
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput, TokenClassifierOutput from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput, TokenClassifierOutput
from ...modeling_utils import PreTrainedModel from ...modeling_utils import PreTrainedModel, torch_int_div
from ...utils import logging from ...utils import logging
from .configuration_unispeech_sat import UniSpeechSatConfig from .configuration_unispeech_sat import UniSpeechSatConfig
...@@ -1003,7 +1003,7 @@ class UniSpeechSatPreTrainedModel(PreTrainedModel): ...@@ -1003,7 +1003,7 @@ class UniSpeechSatPreTrainedModel(PreTrainedModel):
def _conv_out_length(input_length, kernel_size, stride): def _conv_out_length(input_length, kernel_size, stride):
# 1D convolutional layer output length formula taken # 1D convolutional layer output length formula taken
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
return (input_length - kernel_size) // stride + 1 return torch_int_div(input_length - kernel_size, stride) + 1
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride): for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
input_lengths = _conv_out_length(input_lengths, kernel_size, stride) input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
......
...@@ -41,7 +41,7 @@ from ...modeling_outputs import ( ...@@ -41,7 +41,7 @@ from ...modeling_outputs import (
SequenceClassifierOutput, SequenceClassifierOutput,
TokenClassifierOutput, TokenClassifierOutput,
) )
from ...modeling_utils import PreTrainedModel from ...modeling_utils import PreTrainedModel, torch_int_div
from ...utils import logging from ...utils import logging
from .configuration_wav2vec2 import Wav2Vec2Config from .configuration_wav2vec2 import Wav2Vec2Config
...@@ -1104,7 +1104,7 @@ class Wav2Vec2PreTrainedModel(PreTrainedModel): ...@@ -1104,7 +1104,7 @@ class Wav2Vec2PreTrainedModel(PreTrainedModel):
def _conv_out_length(input_length, kernel_size, stride): def _conv_out_length(input_length, kernel_size, stride):
# 1D convolutional layer output length formula taken # 1D convolutional layer output length formula taken
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
return (input_length - kernel_size) // stride + 1 return torch_int_div(input_length - kernel_size, stride) + 1
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride): for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
input_lengths = _conv_out_length(input_lengths, kernel_size, stride) input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
......
...@@ -35,7 +35,7 @@ from ...file_utils import ( ...@@ -35,7 +35,7 @@ from ...file_utils import (
add_start_docstrings_to_model_forward, add_start_docstrings_to_model_forward,
) )
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput, TokenClassifierOutput from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput, TokenClassifierOutput
from ...modeling_utils import PreTrainedModel from ...modeling_utils import PreTrainedModel, torch_int_div
from ...utils import logging from ...utils import logging
from .configuration_wavlm import WavLMConfig from .configuration_wavlm import WavLMConfig
...@@ -1057,7 +1057,7 @@ class WavLMPreTrainedModel(PreTrainedModel): ...@@ -1057,7 +1057,7 @@ class WavLMPreTrainedModel(PreTrainedModel):
def _conv_out_length(input_length, kernel_size, stride): def _conv_out_length(input_length, kernel_size, stride):
# 1D convolutional layer output length formula taken # 1D convolutional layer output length formula taken
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
return (input_length - kernel_size) // stride + 1 return torch_int_div(input_length - kernel_size, stride) + 1
for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride): for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
input_lengths = _conv_out_length(input_lengths, kernel_size, stride) input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
......
...@@ -794,10 +794,10 @@ class Wav2Vec2RobustModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -794,10 +794,10 @@ class Wav2Vec2RobustModelTest(ModelTesterMixin, unittest.TestCase):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
model = Wav2Vec2ForPreTraining(config).to(torch_device) model = Wav2Vec2ForPreTraining(config).to(torch_device)
features_shape = ( batch_size = inputs_dict["input_values"].shape[0]
inputs_dict["input_values"].shape[0], feature_seq_length = int(model._get_feat_extract_output_lengths(inputs_dict["input_values"].shape[1]))
model._get_feat_extract_output_lengths(inputs_dict["input_values"].shape[1]),
) features_shape = (batch_size, feature_seq_length)
mask_time_indices = _compute_mask_indices( mask_time_indices = _compute_mask_indices(
features_shape, features_shape,
...@@ -1158,10 +1158,10 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): ...@@ -1158,10 +1158,10 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
inputs_dict = feature_extractor(input_speech, return_tensors="pt", padding=True) inputs_dict = feature_extractor(input_speech, return_tensors="pt", padding=True)
features_shape = ( batch_size = inputs_dict["input_values"].shape[0]
inputs_dict["input_values"].shape[0], feature_seq_length = int(model._get_feat_extract_output_lengths(inputs_dict["input_values"].shape[1]))
model._get_feat_extract_output_lengths(torch.tensor(inputs_dict["input_values"].shape[1])),
) features_shape = (batch_size, feature_seq_length)
np.random.seed(4) np.random.seed(4)
mask_time_indices = _compute_mask_indices( mask_time_indices = _compute_mask_indices(
...@@ -1208,10 +1208,10 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): ...@@ -1208,10 +1208,10 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
inputs_dict = feature_extractor(input_speech, return_tensors="pt", padding=True) inputs_dict = feature_extractor(input_speech, return_tensors="pt", padding=True)
features_shape = ( batch_size = inputs_dict["input_values"].shape[0]
inputs_dict["input_values"].shape[0], feature_seq_length = int(model._get_feat_extract_output_lengths(inputs_dict["input_values"].shape[1]))
model._get_feat_extract_output_lengths(torch.tensor(inputs_dict["input_values"].shape[1])),
) features_shape = (batch_size, feature_seq_length)
torch.manual_seed(0) torch.manual_seed(0)
mask_time_indices = _compute_mask_indices( mask_time_indices = _compute_mask_indices(
...@@ -1279,10 +1279,10 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): ...@@ -1279,10 +1279,10 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
inputs_dict = feature_extractor(input_speech, return_tensors="pt", padding=True) inputs_dict = feature_extractor(input_speech, return_tensors="pt", padding=True)
features_shape = ( batch_size = inputs_dict["input_values"].shape[0]
inputs_dict["input_values"].shape[0], feature_seq_length = int(model._get_feat_extract_output_lengths(inputs_dict["input_values"].shape[1]))
model._get_feat_extract_output_lengths(inputs_dict["input_values"].shape[1]),
) features_shape = (batch_size, feature_seq_length)
torch.manual_seed(0) torch.manual_seed(0)
np.random.seed(0) np.random.seed(0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment