"...git@developer.sourcefind.cn:OpenDAS/diffusers.git" did not exist on "9eb389f298714b1d63458a9553778e135cca562a"
Unverified Commit 98122794 authored by JuheonChu's avatar JuheonChu Committed by GitHub
Browse files

Replace assertions with value errors on distilbert model (#20463)



* Changed assert into 7-8 exceptions

* updated syntax error

* updated error

* updated file (Co-autho: Batese2001)

* Successful test on test_modeling_distilbert.py 

Successful raising errors and exceptions on the revised code in test_modeling_distilbert.py .

Co-credit: @batese2001

* Delete test_modeling_distilbert.ipynb

* Update modeling_distilbert.py

* Successful raising of exceptions with the conditions that are contrary to defined condition that asserts statements (Co-author: Batese2001)

* Successful raising of exceptions with the conditions that are contrary to defined condition that asserts statements (Co-author: Batese2001)

* committing the reformatted distilbert model

* reformatted distilbert model

* reformatted distilbert model

* reformatted distilbert model

* reformatted distilbert model with black

* Changed comments that explain better about raising exceptions for not having the even number of multi heads

* Changed comments that explain better about raising exceptions for not having the even number of multi heads

* changed based on the feedback

* Changed line 833 based on the suggestion made from @younesbelkada

* Changed line 833 based on the suggestion made from @younesbelkada draft2

* reformatted file

* Update src/transformers/models/distilbert/modeling_distilbert.py

* Update src/transformers/models/distilbert/modeling_distilbert.py
Co-authored-by: default avatarYounes Belkada <49240599+younesbelkada@users.noreply.github.com>
parent 134a8e21
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
PyTorch DistilBERT model adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) and in PyTorch DistilBERT model adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM) and in
part from HuggingFace PyTorch version of Google AI Bert model (https://github.com/google-research/bert) part from HuggingFace PyTorch version of Google AI Bert model (https://github.com/google-research/bert)
...@@ -141,7 +142,10 @@ class MultiHeadSelfAttention(nn.Module): ...@@ -141,7 +142,10 @@ class MultiHeadSelfAttention(nn.Module):
self.dim = config.dim self.dim = config.dim
self.dropout = nn.Dropout(p=config.attention_dropout) self.dropout = nn.Dropout(p=config.attention_dropout)
assert self.dim % self.n_heads == 0 # Have an even number of multi heads that divide the dimensions
if self.dim % self.n_heads != 0:
# Raise value errors for even multi-head attention nodes
raise ValueError(f"self.n_heads: {self.n_heads} must divide self.dim: {self.dim} evenly")
self.q_lin = nn.Linear(in_features=config.dim, out_features=config.dim) self.q_lin = nn.Linear(in_features=config.dim, out_features=config.dim)
self.k_lin = nn.Linear(in_features=config.dim, out_features=config.dim) self.k_lin = nn.Linear(in_features=config.dim, out_features=config.dim)
...@@ -255,7 +259,9 @@ class TransformerBlock(nn.Module): ...@@ -255,7 +259,9 @@ class TransformerBlock(nn.Module):
def __init__(self, config: PretrainedConfig): def __init__(self, config: PretrainedConfig):
super().__init__() super().__init__()
assert config.dim % config.n_heads == 0 # Have an even number of Configure multi-heads
if config.dim % config.n_heads != 0:
raise ValueError(f"config.n_heads {config.n_heads} must divide config.dim {config.dim} evenly")
self.attention = MultiHeadSelfAttention(config) self.attention = MultiHeadSelfAttention(config)
self.sa_layer_norm = nn.LayerNorm(normalized_shape=config.dim, eps=1e-12) self.sa_layer_norm = nn.LayerNorm(normalized_shape=config.dim, eps=1e-12)
...@@ -291,7 +297,9 @@ class TransformerBlock(nn.Module): ...@@ -291,7 +297,9 @@ class TransformerBlock(nn.Module):
if output_attentions: if output_attentions:
sa_output, sa_weights = sa_output # (bs, seq_length, dim), (bs, n_heads, seq_length, seq_length) sa_output, sa_weights = sa_output # (bs, seq_length, dim), (bs, n_heads, seq_length, seq_length)
else: # To handle these `output_attentions` or `output_hidden_states` cases returning tuples else: # To handle these `output_attentions` or `output_hidden_states` cases returning tuples
assert type(sa_output) == tuple if type(sa_output) != tuple:
raise TypeError(f"sa_output must be a tuple but it is {type(sa_output)} type")
sa_output = sa_output[0] sa_output = sa_output[0]
sa_output = self.sa_layer_norm(sa_output + x) # (bs, seq_length, dim) sa_output = self.sa_layer_norm(sa_output + x) # (bs, seq_length, dim)
...@@ -320,6 +328,7 @@ class Transformer(nn.Module): ...@@ -320,6 +328,7 @@ class Transformer(nn.Module):
output_hidden_states: bool = False, output_hidden_states: bool = False,
return_dict: Optional[bool] = None, return_dict: Optional[bool] = None,
) -> Union[BaseModelOutput, Tuple[torch.Tensor, ...]]: # docstyle-ignore ) -> Union[BaseModelOutput, Tuple[torch.Tensor, ...]]: # docstyle-ignore
""" """
Parameters: Parameters:
x: torch.tensor(bs, seq_length, dim) Input sequence embedded. x: torch.tensor(bs, seq_length, dim) Input sequence embedded.
...@@ -348,11 +357,14 @@ class Transformer(nn.Module): ...@@ -348,11 +357,14 @@ class Transformer(nn.Module):
hidden_state = layer_outputs[-1] hidden_state = layer_outputs[-1]
if output_attentions: if output_attentions:
assert len(layer_outputs) == 2 if len(layer_outputs) != 2:
raise ValueError(f"The length of the layer_outputs should be 2, but it is {len(layer_outputs)}")
attentions = layer_outputs[0] attentions = layer_outputs[0]
all_attentions = all_attentions + (attentions,) all_attentions = all_attentions + (attentions,)
else: else:
assert len(layer_outputs) == 1 if len(layer_outputs) != 1:
raise ValueError(f"The length of the layer_outputs should be 1, but it is {len(layer_outputs)}")
# Add last layer # Add last layer
if output_hidden_states: if output_hidden_states:
...@@ -810,7 +822,9 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel): ...@@ -810,7 +822,9 @@ class DistilBertForQuestionAnswering(DistilBertPreTrainedModel):
self.distilbert = DistilBertModel(config) self.distilbert = DistilBertModel(config)
self.qa_outputs = nn.Linear(config.dim, config.num_labels) self.qa_outputs = nn.Linear(config.dim, config.num_labels)
assert config.num_labels == 2 if config.num_labels != 2:
raise ValueError(f"config.num_labels should be 2, but it is {config.num_labels}")
self.dropout = nn.Dropout(config.qa_dropout) self.dropout = nn.Dropout(config.qa_dropout)
# Initialize weights and apply final processing # Initialize weights and apply final processing
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment