"tests/models/gpt2/test_modeling_gpt2.py" did not exist on "62f5ae68ec2ebee34f2a0bbbf195343165e4ce3a"
Unverified Commit 600496fa authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

[Wav2Vec2] Rename model's feature extractor to feature encoder (#14959)

* rename classes

* clean up more namings

* remove bogus file

* Apply suggestions from code review

* Apply suggestions from code review

* replace more names

* more regex replace

* make style

* correct

* correct more

* make style

* finish

* correct more in wav2vec2

* make style

* improve freeze_extractor

* add aliases

* add tf aliases
parent 1bfa3477
......@@ -431,7 +431,7 @@ class Wav2Vec2SamePadLayer(nn.Module):
return hidden_states
class Wav2Vec2FeatureExtractor(nn.Module):
class Wav2Vec2FeatureEncoder(nn.Module):
"""Construct the features from raw audio waveform"""
def __init__(self, config):
......@@ -484,6 +484,17 @@ class Wav2Vec2FeatureExtractor(nn.Module):
return hidden_states
class Wav2Vec2FeatureExtractor(Wav2Vec2FeatureEncoder):
def __init__(self, config):
super().__init__(config)
warnings.warn(
f"The class `{self.__class__.__name__}` has been depreciated "
"and will be removed in Transformers v5. "
f"Use `{self.__class__.__bases__[0].__name__}` instead.",
FutureWarning,
)
class Wav2Vec2FeatureProjection(nn.Module):
def __init__(self, config):
super().__init__()
......@@ -1125,7 +1136,7 @@ class Wav2Vec2PreTrainedModel(PreTrainedModel):
return attention_mask
def _set_gradient_checkpointing(self, module, value=False):
if isinstance(module, (Wav2Vec2Encoder, Wav2Vec2EncoderStableLayerNorm, Wav2Vec2FeatureExtractor)):
if isinstance(module, (Wav2Vec2Encoder, Wav2Vec2EncoderStableLayerNorm, Wav2Vec2FeatureEncoder)):
module.gradient_checkpointing = value
......@@ -1194,7 +1205,7 @@ class Wav2Vec2Model(Wav2Vec2PreTrainedModel):
def __init__(self, config: Wav2Vec2Config):
super().__init__(config)
self.config = config
self.feature_extractor = Wav2Vec2FeatureExtractor(config)
self.feature_extractor = Wav2Vec2FeatureEncoder(config)
self.feature_projection = Wav2Vec2FeatureProjection(config)
# model only needs masking vector if mask prob is > 0.0
......@@ -1213,8 +1224,20 @@ class Wav2Vec2Model(Wav2Vec2PreTrainedModel):
def freeze_feature_extractor(self):
"""
Calling this function will disable the gradient computation for the feature extractor so that its parameters
will not be updated during training.
Calling this function will disable the gradient computation for the feature encoder so that its parameters will
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
self.freeze_feature_encoder()
def freeze_feature_encoder(self):
"""
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
self.feature_extractor._freeze_parameters()
......@@ -1349,8 +1372,20 @@ class Wav2Vec2ForPreTraining(Wav2Vec2PreTrainedModel):
def freeze_feature_extractor(self):
"""
Calling this function will disable the gradient computation for the feature extractor so that its parameters
will not be updated during training.
Calling this function will disable the gradient computation for the feature encoder so that its parameters will
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
self.freeze_feature_encoder()
def freeze_feature_encoder(self):
"""
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
self.wav2vec2.feature_extractor._freeze_parameters()
......@@ -1637,8 +1672,20 @@ class Wav2Vec2ForCTC(Wav2Vec2PreTrainedModel):
def freeze_feature_extractor(self):
"""
Calling this function will disable the gradient computation for the feature extractor so that its parameter
will not be updated during training.
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
self.freeze_feature_encoder()
def freeze_feature_encoder(self):
"""
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
self.wav2vec2.feature_extractor._freeze_parameters()
......@@ -1745,8 +1792,20 @@ class Wav2Vec2ForSequenceClassification(Wav2Vec2PreTrainedModel):
def freeze_feature_extractor(self):
"""
Calling this function will disable the gradient computation for the feature extractor so that its parameters
will not be updated during training.
Calling this function will disable the gradient computation for the feature encoder so that its parameters will
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
self.freeze_feature_encoder()
def freeze_feature_encoder(self):
"""
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
self.wav2vec2.feature_extractor._freeze_parameters()
......@@ -1848,8 +1907,20 @@ class Wav2Vec2ForAudioFrameClassification(Wav2Vec2PreTrainedModel):
def freeze_feature_extractor(self):
"""
Calling this function will disable the gradient computation for the feature extractor so that its parameters
will not be updated during training.
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
self.freeze_feature_encoder()
def freeze_feature_encoder(self):
"""
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
self.wav2vec2.feature_extractor._freeze_parameters()
......@@ -1994,8 +2065,20 @@ class Wav2Vec2ForXVector(Wav2Vec2PreTrainedModel):
def freeze_feature_extractor(self):
"""
Calling this function will disable the gradient computation for the feature extractor so that its parameters
will not be updated during training.
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
self.freeze_feature_encoder()
def freeze_feature_encoder(self):
"""
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
self.wav2vec2.feature_extractor._freeze_parameters()
......
......@@ -64,24 +64,24 @@ class WavLMConfig(PretrainedConfig):
layer_norm_eps (`float`, *optional*, defaults to 1e-12):
The epsilon used by the layer normalization layers.
feat_extract_norm (`str`, *optional*, defaults to `"group"`):
The norm to be applied to 1D convolutional layers in feature extractor. One of `"group"` for group
The norm to be applied to 1D convolutional layers in feature encoder. One of `"group"` for group
normalization of only the first 1D convolutional layer or `"layer"` for layer normalization of all 1D
convolutional layers.
feat_proj_dropout (`float`, *optional*, defaults to 0.0):
The dropout probability for output of the feature extractor.
The dropout probability for output of the feature encoder.
feat_extract_activation (`str, `optional`, defaults to `"gelu"`):
The non-linear activation function (function or string) in the 1D convolutional layers of the feature
extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
The dropout probabilitiy for quantized feature extractor states.
The dropout probabilitiy for quantized feature encoder states.
conv_dim (`Tuple[int]`, *optional*, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
A tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
feature extractor. The length of *conv_dim* defines the number of 1D convolutional layers.
feature encoder. The length of *conv_dim* defines the number of 1D convolutional layers.
conv_stride (`Tuple[int]`, *optional*, defaults to `(5, 2, 2, 2, 2, 2, 2)`):
A tuple of integers defining the stride of each 1D convolutional layer in the feature extractor. The length
A tuple of integers defining the stride of each 1D convolutional layer in the feature encoder. The length
of *conv_stride* defines the number of convolutional layers and has to match the the length of *conv_dim*.
conv_kernel (`Tuple[int]`, *optional*, defaults to `(10, 3, 3, 3, 3, 3, 3)`):
A tuple of integers defining the kernel size of each 1D convolutional layer in the feature extractor. The
A tuple of integers defining the kernel size of each 1D convolutional layer in the feature encoder. The
length of *conv_kernel* defines the number of convolutional layers and has to match the the length of
*conv_dim*.
conv_bias (`bool`, *optional*, defaults to `False`):
......@@ -96,7 +96,7 @@ class WavLMConfig(PretrainedConfig):
True` corresponds to applying layer norm before the attention layer, whereas `do_stable_layer_norm is
False` corresponds to applying layer norm after the attention layer.
apply_spec_augment (`bool`, *optional*, defaults to `True`):
Whether to apply *SpecAugment* data augmentation to the outputs of the feature extractor. For reference see
Whether to apply *SpecAugment* data augmentation to the outputs of the feature encoder. For reference see
[SpecAugment: A Simple Data Augmentation Method for Automatic Speech
Recognition](https://arxiv.org/abs/1904.08779).
mask_time_prob (`float`, *optional*, defaults to 0.05):
......@@ -122,7 +122,7 @@ class WavLMConfig(PretrainedConfig):
contrastive_logits_temperature (`float`, *optional*, defaults to 0.1):
The temperature *kappa* in the contrastive loss.
feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
The dropout probabilitiy for the output of the feature extractor that's used by the quantizer.
The dropout probabilitiy for the output of the feature encoder that's used by the quantizer.
num_negatives (`int`, *optional*, defaults to 100):
Number of negative samples for the contrastive loss.
codevector_dim (`int`, *optional*, defaults to 256):
......
......@@ -15,6 +15,7 @@
""" PyTorch WavLM model."""
import math
import warnings
from dataclasses import dataclass
from typing import Optional, Tuple, Union
......@@ -352,8 +353,8 @@ class WavLMSamePadLayer(nn.Module):
return hidden_states
# Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2FeatureExtractor with Wav2Vec2->WavLM
class WavLMFeatureExtractor(nn.Module):
# Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2FeatureEncoder with Wav2Vec2->WavLM
class WavLMFeatureEncoder(nn.Module):
"""Construct the features from raw audio waveform"""
def __init__(self, config):
......@@ -404,6 +405,17 @@ class WavLMFeatureExtractor(nn.Module):
return hidden_states
class WavLMFeatureExtractor(WavLMFeatureEncoder):
def __init__(self, config):
super().__init__(config)
warnings.warn(
f"The class `{self.__class__.__name__}` has been depreciated "
"and will be removed in Transformers v5. "
f"Use `{self.__class__.__bases__[0].__name__}` instead.",
FutureWarning,
)
# Copied from transformers.models.wav2vec2.modeling_wav2vec2.Wav2Vec2FeatureProjection with Wav2Vec2->WavLM
class WavLMFeatureProjection(nn.Module):
def __init__(self, config):
......@@ -1077,7 +1089,7 @@ class WavLMPreTrainedModel(PreTrainedModel):
return attention_mask
def _set_gradient_checkpointing(self, module, value=False):
if isinstance(module, (WavLMEncoder, WavLMEncoderStableLayerNorm, WavLMFeatureExtractor)):
if isinstance(module, (WavLMEncoder, WavLMEncoderStableLayerNorm, WavLMFeatureEncoder)):
module.gradient_checkpointing = value
......@@ -1146,7 +1158,7 @@ class WavLMModel(WavLMPreTrainedModel):
def __init__(self, config: WavLMConfig):
super().__init__(config)
self.config = config
self.feature_extractor = WavLMFeatureExtractor(config)
self.feature_extractor = WavLMFeatureEncoder(config)
self.feature_projection = WavLMFeatureProjection(config)
# model only needs masking vector if mask prob is > 0.0
......@@ -1165,8 +1177,20 @@ class WavLMModel(WavLMPreTrainedModel):
def freeze_feature_extractor(self):
"""
Calling this function will disable the gradient computation for the feature extractor so that its parameters
will not be updated during training.
Calling this function will disable the gradient computation for the feature encoder so that its parameters will
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
self.freeze_feature_encoder()
def freeze_feature_encoder(self):
"""
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
self.feature_extractor._freeze_parameters()
......@@ -1303,8 +1327,20 @@ class WavLMForCTC(WavLMPreTrainedModel):
def freeze_feature_extractor(self):
"""
Calling this function will disable the gradient computation for the feature extractor so that its parameter
will not be updated during training.
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
self.freeze_feature_encoder()
def freeze_feature_encoder(self):
"""
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
self.wavlm.feature_extractor._freeze_parameters()
......@@ -1412,8 +1448,20 @@ class WavLMForSequenceClassification(WavLMPreTrainedModel):
def freeze_feature_extractor(self):
"""
Calling this function will disable the gradient computation for the feature extractor so that its parameters
will not be updated during training.
Calling this function will disable the gradient computation for the feature encoder so that its parameters will
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
self.freeze_feature_encoder()
def freeze_feature_encoder(self):
"""
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
self.wavlm.feature_extractor._freeze_parameters()
......@@ -1516,8 +1564,20 @@ class WavLMForAudioFrameClassification(WavLMPreTrainedModel):
def freeze_feature_extractor(self):
"""
Calling this function will disable the gradient computation for the feature extractor so that its parameters
will not be updated during training.
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
self.freeze_feature_encoder()
def freeze_feature_encoder(self):
"""
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
self.wavlm.feature_extractor._freeze_parameters()
......@@ -1665,8 +1725,20 @@ class WavLMForXVector(WavLMPreTrainedModel):
def freeze_feature_extractor(self):
"""
Calling this function will disable the gradient computation for the feature extractor so that its parameters
will not be updated during training.
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
warnings.warn(
"The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5."
"Please use the equivalent `freeze_feature_encoder` method instead.",
FutureWarning,
)
self.freeze_feature_encoder()
def freeze_feature_encoder(self):
"""
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
"""
self.wavlm.feature_extractor._freeze_parameters()
......
......@@ -225,7 +225,7 @@ class HubertModelTester:
model.train()
# freeze feature encoder
model.freeze_feature_extractor()
model.freeze_feature_encoder()
input_values = input_values[:3]
......
......@@ -203,7 +203,7 @@ class SEWModelTester:
model.train()
# freeze feature encoder
model.freeze_feature_extractor()
model.freeze_feature_encoder()
input_values = input_values[:3]
......
......@@ -224,7 +224,7 @@ class SEWDModelTester:
model.train()
# freeze feature encoder
model.freeze_feature_extractor()
model.freeze_feature_encoder()
input_values = input_values[:3]
......
......@@ -184,7 +184,7 @@ class TFHubertModelTester:
model = TFHubertForCTC(config)
# freeze feature encoder
model.freeze_feature_extractor()
model.freeze_feature_encoder()
input_values = input_values[:3]
......
......@@ -194,7 +194,7 @@ class TFWav2Vec2ModelTester:
model = TFWav2Vec2ForCTC(config)
# freeze feature encoder
model.freeze_feature_extractor()
model.freeze_feature_encoder()
input_values = input_values[:3]
......
......@@ -226,7 +226,7 @@ class UniSpeechModelTester:
model.train()
# freeze feature encoder
model.freeze_feature_extractor()
model.freeze_feature_encoder()
input_values = input_values[:3]
......
......@@ -246,7 +246,7 @@ class UniSpeechSatModelTester:
model.train()
# freeze feature encoder
model.freeze_feature_extractor()
model.freeze_feature_encoder()
input_values = input_values[:3]
......
......@@ -300,7 +300,7 @@ class Wav2Vec2ModelTester:
model.train()
# freeze feature encoder
model.freeze_feature_extractor()
model.freeze_feature_encoder()
input_values = input_values[:3]
......
......@@ -238,7 +238,7 @@ class WavLMModelTester:
model.train()
# freeze feature encoder
model.freeze_feature_extractor()
model.freeze_feature_encoder()
input_values = input_values[:3]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment