"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "4fc9f9ef54e2ab250042c55b55a2e3c097858cb7"
Unverified Commit 5c8d941d authored by Thien Tran's avatar Thien Tran Committed by GitHub
Browse files

Use Conv1d for TDNN (#25728)

* use conv for tdnn

* run make fixup

* update TDNN

* add PEFT LoRA check

* propagate tdnn warnings to others

* add missing imports

* update TDNN in wav2vec2_bert

* add missing imports
parent 866253f8
...@@ -35,7 +35,13 @@ from ...modeling_outputs import ( ...@@ -35,7 +35,13 @@ from ...modeling_outputs import (
XVectorOutput, XVectorOutput,
) )
from ...modeling_utils import PreTrainedModel from ...modeling_utils import PreTrainedModel
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging from ...utils import (
add_code_sample_docstrings,
add_start_docstrings,
add_start_docstrings_to_model_forward,
is_peft_available,
logging,
)
from .configuration_data2vec_audio import Data2VecAudioConfig from .configuration_data2vec_audio import Data2VecAudioConfig
...@@ -1342,16 +1348,21 @@ class TDNNLayer(nn.Module): ...@@ -1342,16 +1348,21 @@ class TDNNLayer(nn.Module):
self.kernel = nn.Linear(self.in_conv_dim * self.kernel_size, self.out_conv_dim) self.kernel = nn.Linear(self.in_conv_dim * self.kernel_size, self.out_conv_dim)
self.activation = nn.ReLU() self.activation = nn.ReLU()
def forward(self, hidden_states): def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
hidden_states = hidden_states.unsqueeze(1) if is_peft_available():
hidden_states = nn.functional.unfold( from peft.tuners.lora import LoraLayer
hidden_states,
(self.kernel_size, self.in_conv_dim), if isinstance(self.kernel, LoraLayer):
stride=(1, self.in_conv_dim), warnings.warn(
dilation=(self.dilation, 1), "Detected LoRA on TDNNLayer. LoRA weights won't be applied due to optimization. "
) "You should exclude TDNNLayer from LoRA's target modules.",
)
# for backward compatibility, we keep nn.Linear but call F.conv1d for speed up
hidden_states = hidden_states.transpose(1, 2)
weight = self.kernel.weight.view(self.out_conv_dim, self.kernel_size, self.in_conv_dim).transpose(1, 2)
hidden_states = nn.functional.conv1d(hidden_states, weight, self.kernel.bias, dilation=self.dilation)
hidden_states = hidden_states.transpose(1, 2) hidden_states = hidden_states.transpose(1, 2)
hidden_states = self.kernel(hidden_states)
hidden_states = self.activation(hidden_states) hidden_states = self.activation(hidden_states)
return hidden_states return hidden_states
......
...@@ -41,6 +41,7 @@ from ...utils import ( ...@@ -41,6 +41,7 @@ from ...utils import (
add_code_sample_docstrings, add_code_sample_docstrings,
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_model_forward, add_start_docstrings_to_model_forward,
is_peft_available,
logging, logging,
replace_return_docstrings, replace_return_docstrings,
) )
...@@ -1796,16 +1797,21 @@ class TDNNLayer(nn.Module): ...@@ -1796,16 +1797,21 @@ class TDNNLayer(nn.Module):
self.kernel = nn.Linear(self.in_conv_dim * self.kernel_size, self.out_conv_dim) self.kernel = nn.Linear(self.in_conv_dim * self.kernel_size, self.out_conv_dim)
self.activation = nn.ReLU() self.activation = nn.ReLU()
def forward(self, hidden_states): def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
hidden_states = hidden_states.unsqueeze(1) if is_peft_available():
hidden_states = nn.functional.unfold( from peft.tuners.lora import LoraLayer
hidden_states,
(self.kernel_size, self.in_conv_dim), if isinstance(self.kernel, LoraLayer):
stride=(1, self.in_conv_dim), warnings.warn(
dilation=(self.dilation, 1), "Detected LoRA on TDNNLayer. LoRA weights won't be applied due to optimization. "
) "You should exclude TDNNLayer from LoRA's target modules.",
)
# for backward compatibility, we keep nn.Linear but call F.conv1d for speed up
hidden_states = hidden_states.transpose(1, 2)
weight = self.kernel.weight.view(self.out_conv_dim, self.kernel_size, self.in_conv_dim).transpose(1, 2)
hidden_states = nn.functional.conv1d(hidden_states, weight, self.kernel.bias, dilation=self.dilation)
hidden_states = hidden_states.transpose(1, 2) hidden_states = hidden_states.transpose(1, 2)
hidden_states = self.kernel(hidden_states)
hidden_states = self.activation(hidden_states) hidden_states = self.activation(hidden_states)
return hidden_states return hidden_states
......
...@@ -44,6 +44,7 @@ from ...utils import ( ...@@ -44,6 +44,7 @@ from ...utils import (
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_model_forward, add_start_docstrings_to_model_forward,
cached_file, cached_file,
is_peft_available,
is_safetensors_available, is_safetensors_available,
logging, logging,
replace_return_docstrings, replace_return_docstrings,
...@@ -2287,16 +2288,21 @@ class TDNNLayer(nn.Module): ...@@ -2287,16 +2288,21 @@ class TDNNLayer(nn.Module):
self.kernel = nn.Linear(self.in_conv_dim * self.kernel_size, self.out_conv_dim) self.kernel = nn.Linear(self.in_conv_dim * self.kernel_size, self.out_conv_dim)
self.activation = nn.ReLU() self.activation = nn.ReLU()
def forward(self, hidden_states): def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
hidden_states = hidden_states.unsqueeze(1) if is_peft_available():
hidden_states = nn.functional.unfold( from peft.tuners.lora import LoraLayer
hidden_states,
(self.kernel_size, self.in_conv_dim), if isinstance(self.kernel, LoraLayer):
stride=(1, self.in_conv_dim), warnings.warn(
dilation=(self.dilation, 1), "Detected LoRA on TDNNLayer. LoRA weights won't be applied due to optimization. "
) "You should exclude TDNNLayer from LoRA's target modules.",
)
# for backward compatibility, we keep nn.Linear but call F.conv1d for speed up
hidden_states = hidden_states.transpose(1, 2)
weight = self.kernel.weight.view(self.out_conv_dim, self.kernel_size, self.in_conv_dim).transpose(1, 2)
hidden_states = nn.functional.conv1d(hidden_states, weight, self.kernel.bias, dilation=self.dilation)
hidden_states = hidden_states.transpose(1, 2) hidden_states = hidden_states.transpose(1, 2)
hidden_states = self.kernel(hidden_states)
hidden_states = self.activation(hidden_states) hidden_states = self.activation(hidden_states)
return hidden_states return hidden_states
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
""" PyTorch Wav2Vec2-BERT model.""" """ PyTorch Wav2Vec2-BERT model."""
import math import math
import warnings
from typing import Optional, Tuple, Union from typing import Optional, Tuple, Union
import numpy as np import numpy as np
...@@ -39,6 +40,7 @@ from ...utils import ( ...@@ -39,6 +40,7 @@ from ...utils import (
add_code_sample_docstrings, add_code_sample_docstrings,
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_model_forward, add_start_docstrings_to_model_forward,
is_peft_available,
logging, logging,
) )
from .configuration_wav2vec2_bert import Wav2Vec2BertConfig from .configuration_wav2vec2_bert import Wav2Vec2BertConfig
...@@ -1516,16 +1518,21 @@ class TDNNLayer(nn.Module): ...@@ -1516,16 +1518,21 @@ class TDNNLayer(nn.Module):
self.kernel = nn.Linear(self.in_conv_dim * self.kernel_size, self.out_conv_dim) self.kernel = nn.Linear(self.in_conv_dim * self.kernel_size, self.out_conv_dim)
self.activation = nn.ReLU() self.activation = nn.ReLU()
def forward(self, hidden_states): def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
hidden_states = hidden_states.unsqueeze(1) if is_peft_available():
hidden_states = nn.functional.unfold( from peft.tuners.lora import LoraLayer
hidden_states,
(self.kernel_size, self.in_conv_dim), if isinstance(self.kernel, LoraLayer):
stride=(1, self.in_conv_dim), warnings.warn(
dilation=(self.dilation, 1), "Detected LoRA on TDNNLayer. LoRA weights won't be applied due to optimization. "
) "You should exclude TDNNLayer from LoRA's target modules.",
)
# for backward compatibility, we keep nn.Linear but call F.conv1d for speed up
hidden_states = hidden_states.transpose(1, 2)
weight = self.kernel.weight.view(self.out_conv_dim, self.kernel_size, self.in_conv_dim).transpose(1, 2)
hidden_states = nn.functional.conv1d(hidden_states, weight, self.kernel.bias, dilation=self.dilation)
hidden_states = hidden_states.transpose(1, 2) hidden_states = hidden_states.transpose(1, 2)
hidden_states = self.kernel(hidden_states)
hidden_states = self.activation(hidden_states) hidden_states = self.activation(hidden_states)
return hidden_states return hidden_states
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
""" PyTorch Wav2Vec2-Conformer model.""" """ PyTorch Wav2Vec2-Conformer model."""
import math import math
import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple, Union from typing import Optional, Tuple, Union
...@@ -40,6 +41,7 @@ from ...utils import ( ...@@ -40,6 +41,7 @@ from ...utils import (
add_code_sample_docstrings, add_code_sample_docstrings,
add_start_docstrings, add_start_docstrings,
add_start_docstrings_to_model_forward, add_start_docstrings_to_model_forward,
is_peft_available,
logging, logging,
replace_return_docstrings, replace_return_docstrings,
) )
...@@ -1948,16 +1950,21 @@ class TDNNLayer(nn.Module): ...@@ -1948,16 +1950,21 @@ class TDNNLayer(nn.Module):
self.kernel = nn.Linear(self.in_conv_dim * self.kernel_size, self.out_conv_dim) self.kernel = nn.Linear(self.in_conv_dim * self.kernel_size, self.out_conv_dim)
self.activation = nn.ReLU() self.activation = nn.ReLU()
def forward(self, hidden_states): def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
hidden_states = hidden_states.unsqueeze(1) if is_peft_available():
hidden_states = nn.functional.unfold( from peft.tuners.lora import LoraLayer
hidden_states,
(self.kernel_size, self.in_conv_dim), if isinstance(self.kernel, LoraLayer):
stride=(1, self.in_conv_dim), warnings.warn(
dilation=(self.dilation, 1), "Detected LoRA on TDNNLayer. LoRA weights won't be applied due to optimization. "
) "You should exclude TDNNLayer from LoRA's target modules.",
)
# for backward compatibility, we keep nn.Linear but call F.conv1d for speed up
hidden_states = hidden_states.transpose(1, 2)
weight = self.kernel.weight.view(self.out_conv_dim, self.kernel_size, self.in_conv_dim).transpose(1, 2)
hidden_states = nn.functional.conv1d(hidden_states, weight, self.kernel.bias, dilation=self.dilation)
hidden_states = hidden_states.transpose(1, 2) hidden_states = hidden_states.transpose(1, 2)
hidden_states = self.kernel(hidden_states)
hidden_states = self.activation(hidden_states) hidden_states = self.activation(hidden_states)
return hidden_states return hidden_states
......
...@@ -36,7 +36,13 @@ from ...modeling_outputs import ( ...@@ -36,7 +36,13 @@ from ...modeling_outputs import (
XVectorOutput, XVectorOutput,
) )
from ...modeling_utils import PreTrainedModel from ...modeling_utils import PreTrainedModel
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging from ...utils import (
add_code_sample_docstrings,
add_start_docstrings,
add_start_docstrings_to_model_forward,
is_peft_available,
logging,
)
from .configuration_wavlm import WavLMConfig from .configuration_wavlm import WavLMConfig
...@@ -1674,16 +1680,21 @@ class TDNNLayer(nn.Module): ...@@ -1674,16 +1680,21 @@ class TDNNLayer(nn.Module):
self.kernel = nn.Linear(self.in_conv_dim * self.kernel_size, self.out_conv_dim) self.kernel = nn.Linear(self.in_conv_dim * self.kernel_size, self.out_conv_dim)
self.activation = nn.ReLU() self.activation = nn.ReLU()
def forward(self, hidden_states): def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
hidden_states = hidden_states.unsqueeze(1) if is_peft_available():
hidden_states = nn.functional.unfold( from peft.tuners.lora import LoraLayer
hidden_states,
(self.kernel_size, self.in_conv_dim), if isinstance(self.kernel, LoraLayer):
stride=(1, self.in_conv_dim), warnings.warn(
dilation=(self.dilation, 1), "Detected LoRA on TDNNLayer. LoRA weights won't be applied due to optimization. "
) "You should exclude TDNNLayer from LoRA's target modules.",
)
# for backward compatibility, we keep nn.Linear but call F.conv1d for speed up
hidden_states = hidden_states.transpose(1, 2)
weight = self.kernel.weight.view(self.out_conv_dim, self.kernel_size, self.in_conv_dim).transpose(1, 2)
hidden_states = nn.functional.conv1d(hidden_states, weight, self.kernel.bias, dilation=self.dilation)
hidden_states = hidden_states.transpose(1, 2) hidden_states = hidden_states.transpose(1, 2)
hidden_states = self.kernel(hidden_states)
hidden_states = self.activation(hidden_states) hidden_states = self.activation(hidden_states)
return hidden_states return hidden_states
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment