"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "91ff480e2693f36b11aaebc4e9cc79e4e3c049da"
Unverified Commit 53cffeb3 authored by Chi's avatar Chi Committed by GitHub
Browse files

Enhancing Code Readability and Maintainability with Simplified Activation...


Enhancing Code Readability and Maintainability with Simplified Activation Function Selection. (#28349)

* Little bit change code in get_activation()

* proper area to deffine gelu_activation() in this two file

* Fix github issue

* Mistake some typo

* My mistake to self using to call config

* Reformat my two file

* Update src/transformers/activations.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/models/electra/modeling_electra.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Update src/transformers/models/convbert/modeling_convbert.py
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>

* Rename gelu_act to activatioin

---------
Co-authored-by: default avatarArthur <48595927+ArthurZucker@users.noreply.github.com>
parent 3eddda11
...@@ -856,12 +856,13 @@ class ConvBertGeneratorPredictions(nn.Module): ...@@ -856,12 +856,13 @@ class ConvBertGeneratorPredictions(nn.Module):
def __init__(self, config): def __init__(self, config):
super().__init__() super().__init__()
self.activation = get_activation("gelu")
self.LayerNorm = nn.LayerNorm(config.embedding_size, eps=config.layer_norm_eps) self.LayerNorm = nn.LayerNorm(config.embedding_size, eps=config.layer_norm_eps)
self.dense = nn.Linear(config.hidden_size, config.embedding_size) self.dense = nn.Linear(config.hidden_size, config.embedding_size)
def forward(self, generator_hidden_states: torch.FloatTensor) -> torch.FloatTensor: def forward(self, generator_hidden_states: torch.FloatTensor) -> torch.FloatTensor:
hidden_states = self.dense(generator_hidden_states) hidden_states = self.dense(generator_hidden_states)
hidden_states = get_activation("gelu")(hidden_states) hidden_states = self.activation(hidden_states)
hidden_states = self.LayerNorm(hidden_states) hidden_states = self.LayerNorm(hidden_states)
return hidden_states return hidden_states
......
...@@ -631,12 +631,13 @@ class ElectraDiscriminatorPredictions(nn.Module): ...@@ -631,12 +631,13 @@ class ElectraDiscriminatorPredictions(nn.Module):
super().__init__() super().__init__()
self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.dense = nn.Linear(config.hidden_size, config.hidden_size)
self.activation = get_activation(config.hidden_act)
self.dense_prediction = nn.Linear(config.hidden_size, 1) self.dense_prediction = nn.Linear(config.hidden_size, 1)
self.config = config self.config = config
def forward(self, discriminator_hidden_states): def forward(self, discriminator_hidden_states):
hidden_states = self.dense(discriminator_hidden_states) hidden_states = self.dense(discriminator_hidden_states)
hidden_states = get_activation(self.config.hidden_act)(hidden_states) hidden_states = self.activation(hidden_states)
logits = self.dense_prediction(hidden_states).squeeze(-1) logits = self.dense_prediction(hidden_states).squeeze(-1)
return logits return logits
...@@ -648,12 +649,13 @@ class ElectraGeneratorPredictions(nn.Module): ...@@ -648,12 +649,13 @@ class ElectraGeneratorPredictions(nn.Module):
def __init__(self, config): def __init__(self, config):
super().__init__() super().__init__()
self.activation = get_activation("gelu")
self.LayerNorm = nn.LayerNorm(config.embedding_size, eps=config.layer_norm_eps) self.LayerNorm = nn.LayerNorm(config.embedding_size, eps=config.layer_norm_eps)
self.dense = nn.Linear(config.hidden_size, config.embedding_size) self.dense = nn.Linear(config.hidden_size, config.embedding_size)
def forward(self, generator_hidden_states): def forward(self, generator_hidden_states):
hidden_states = self.dense(generator_hidden_states) hidden_states = self.dense(generator_hidden_states)
hidden_states = get_activation("gelu")(hidden_states) hidden_states = self.activation(hidden_states)
hidden_states = self.LayerNorm(hidden_states) hidden_states = self.LayerNorm(hidden_states)
return hidden_states return hidden_states
...@@ -933,6 +935,7 @@ class ElectraClassificationHead(nn.Module): ...@@ -933,6 +935,7 @@ class ElectraClassificationHead(nn.Module):
classifier_dropout = ( classifier_dropout = (
config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
) )
self.activation = get_activation("gelu")
self.dropout = nn.Dropout(classifier_dropout) self.dropout = nn.Dropout(classifier_dropout)
self.out_proj = nn.Linear(config.hidden_size, config.num_labels) self.out_proj = nn.Linear(config.hidden_size, config.num_labels)
...@@ -940,7 +943,7 @@ class ElectraClassificationHead(nn.Module): ...@@ -940,7 +943,7 @@ class ElectraClassificationHead(nn.Module):
x = features[:, 0, :] # take <s> token (equiv. to [CLS]) x = features[:, 0, :] # take <s> token (equiv. to [CLS])
x = self.dropout(x) x = self.dropout(x)
x = self.dense(x) x = self.dense(x)
x = get_activation("gelu")(x) # although BERT uses tanh here, it seems Electra authors used gelu here x = self.activation(x) # although BERT uses tanh here, it seems Electra authors used gelu here
x = self.dropout(x) x = self.dropout(x)
x = self.out_proj(x) x = self.out_proj(x)
return x return x
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment