Unverified Commit cc034f72 authored by Anmol Joshi's avatar Anmol Joshi Committed by GitHub
Browse files

Replace assertion with exception (#16720)



* Updated assertions to exceptions

* updated assertions to exceptions

* bug fixes

* fix-copies

* Update modeling_ctrl.py

* Update src/transformers/models/ctrl/modeling_tf_ctrl.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/gpt_neo/modeling_gpt_neo.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/gptj/modeling_gptj.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/gptj/modeling_tf_gptj.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update modeling_led.py

* Update modeling_led.py

* Update modeling_led.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent 14daa610
......@@ -145,7 +145,8 @@ def load_tf_weights_in_big_bird(model, tf_checkpoint_path, is_trivia_qa=False):
# Load weights from TF model
init_vars = tf.saved_model.load(tf_path).variables if is_trivia_qa else tf.train.list_variables(tf_path)
assert len(init_vars) > 0, "Loaded trained variables cannot be empty."
if len(init_vars) <= 0:
raise ValueError("Loaded trained variables cannot be empty.")
pt_names = list(model.state_dict().keys())
......@@ -460,8 +461,11 @@ class BigBirdBlockSparseAttention(nn.Module):
to_seq_length = from_seq_length = seqlen
from_block_size = to_block_size = self.block_size
assert from_seq_length % from_block_size == 0, "Query sided sequence length must be multiple of block size"
assert to_seq_length % to_block_size == 0, "Key/Value sided sequence length must be multiple of block size"
if from_seq_length % from_block_size != 0:
raise ValueError("Query sided sequence length must be multiple of block size")
if to_seq_length % to_block_size != 0:
raise ValueError("Key/Value sided sequence length must be multiple of block size")
query_layer = self.transpose_for_scores(self.query(hidden_states))
key_layer = self.transpose_for_scores(self.key(hidden_states))
......@@ -1077,9 +1081,8 @@ class BigBirdBlockSparseAttention(nn.Module):
"""
# using this method when from_seq_length in [1024, 3072, 4096]
assert (
from_seq_length // from_block_size == to_seq_length // to_block_size
), "Error the number of blocks needs to be same!"
if from_seq_length // from_block_size != to_seq_length // to_block_size:
raise ValueError("Error the number of blocks needs to be same!")
rand_attn = np.zeros((from_seq_length // from_block_size - 2, num_rand_blocks), dtype=np.int32)
middle_seq = np.arange(1, to_seq_length // to_block_size - 1, dtype=np.int32)
......@@ -1153,11 +1156,11 @@ class BigBirdBlockSparseAttention(nn.Module):
"""
# using this method when from_seq_length not in [1024, 3072, 4096]
assert (
from_seq_length // from_block_size == to_seq_length // to_block_size
), "Error the number of blocks needs to be same!"
if from_seq_length // from_block_size != to_seq_length // to_block_size:
raise ValueError("Error the number of blocks needs to be same!")
assert from_seq_length in plan_from_length, "Error from sequence length not in plan!"
if from_seq_length not in plan_from_length:
raise ValueError("Error from sequence length not in plan!")
# Total number of blocks in the mmask
num_blocks = from_seq_length // from_block_size
......@@ -1397,9 +1400,8 @@ class BigBirdAttention(nn.Module):
output_attentions,
)
else:
assert (
encoder_hidden_states is None
), "BigBird cannot be used as a decoder when config.attention_type != 'original_full'"
if encoder_hidden_states is not None:
raise ValueError("BigBird cannot be used as a decoder when config.attention_type != 'original_full'")
self_outputs = self.self(
hidden_states, band_mask, from_mask, to_mask, from_blocked_mask, to_blocked_mask, output_attentions
)
......@@ -1451,7 +1453,8 @@ class BigBirdLayer(nn.Module):
self.is_decoder = config.is_decoder
self.add_cross_attention = config.add_cross_attention
if self.add_cross_attention:
assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
if not self.is_decoder:
raise TypeError(f"{self} should be used as a decoder model if cross attention is added")
self.crossattention = BigBirdAttention(config)
self.intermediate = BigBirdIntermediate(config)
self.output = BigBirdOutput(config)
......@@ -2183,9 +2186,10 @@ class BigBirdModel(BigBirdPreTrainedModel):
def create_masks_for_block_sparse_attn(attention_mask: torch.Tensor, block_size: int):
batch_size, seq_length = attention_mask.size()
assert (
seq_length % block_size == 0
), f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}."
if seq_length % block_size != 0:
raise ValueError(
f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}."
)
def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask):
"""
......@@ -2454,7 +2458,8 @@ class BigBirdForMaskedLM(BigBirdPreTrainedModel):
effective_batch_size = input_shape[0]
# add a dummy token
assert self.config.pad_token_id is not None, "The PAD token should be defined for generation"
if self.config.pad_token_id is None:
raise ValueError("The PAD token should be defined for generation")
attention_mask = torch.cat([attention_mask, attention_mask.new_zeros((attention_mask.shape[0], 1))], dim=-1)
dummy_token = torch.full(
(effective_batch_size, 1), self.config.pad_token_id, dtype=torch.long, device=input_ids.device
......
......@@ -389,9 +389,10 @@ class FlaxBigBirdBlockSparseAttention(nn.Module):
def create_masks_for_block_sparse_attn(attention_mask, block_size: int):
batch_size, seq_length = attention_mask.shape
assert (
seq_length % block_size == 0
), f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}."
if seq_length % block_size != 0:
raise ValueError(
f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}."
)
def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask):
"""
......@@ -464,8 +465,12 @@ class FlaxBigBirdBlockSparseAttention(nn.Module):
to_seq_len = key_layer.shape[2]
from_block_size = to_block_size = self.config.block_size
assert from_seq_len % from_block_size == 0, "Query sided sequence length must be multiple of block size"
assert to_seq_len % to_block_size == 0, "Key/Value sided sequence length must be multiple of block size"
if from_seq_len % from_block_size != 0:
raise ValueError("Query sided sequence length must be multiple of block size")
if to_seq_len % to_block_size != 0:
raise ValueError("Key/Value sided sequence length must be multiple of block size")
if from_seq_len // from_block_size != to_seq_len // to_block_size:
raise ValueError("Error the number of blocks needs to be same!")
......@@ -863,9 +868,8 @@ class FlaxBigBirdBlockSparseAttention(nn.Module):
"""
# using this method when from_seq_length in [1024, 3072, 4096]
assert (
from_seq_length // from_block_size == to_seq_length // to_block_size
), "Error the number of blocks needs to be same!"
if from_seq_length // from_block_size != to_seq_length // to_block_size:
raise ValueError("Error the number of blocks needs to be same!")
rand_attn = np.zeros((from_seq_length // from_block_size - 2, num_rand_blocks), dtype=np.int32)
middle_seq = np.arange(1, to_seq_length // to_block_size - 1, dtype=np.int32)
......@@ -939,11 +943,11 @@ class FlaxBigBirdBlockSparseAttention(nn.Module):
"""
# using this method when from_seq_length not in [1024, 3072, 4096]
assert (
from_seq_length // from_block_size == to_seq_length // to_block_size
), "Error the number of blocks needs to be same!"
if from_seq_length // from_block_size != to_seq_length // to_block_size:
raise ValueError("Error the number of blocks needs to be same!")
assert from_seq_length in plan_from_length, "Error from sequence length not in plan!"
if from_seq_length not in plan_from_length:
raise ValueError("Error from sequence length not in plan!")
# Total number of blocks in the mmask
num_blocks = from_seq_length // from_block_size
......
......@@ -83,7 +83,8 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start
shifted_input_ids[:, 1:] = input_ids[:, :-1].clone()
shifted_input_ids[:, 0] = decoder_start_token_id
assert pad_token_id is not None, "self.model.config.pad_token_id has to be defined."
if pad_token_id is None:
raise ValueError("self.model.config.pad_token_id has to be defined.")
# replace possible -100 values in labels by `pad_token_id`
shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id)
......@@ -287,8 +288,11 @@ class BigBirdPegasusBlockSparseAttention(nn.Module):
to_seq_length = from_seq_length = seqlen
from_block_size = to_block_size = self.block_size
assert from_seq_length % from_block_size == 0, "Query sided sequence length must be multiple of block size"
assert to_seq_length % to_block_size == 0, "Key/Value sided sequence length must be multiple of block size"
if from_seq_length % from_block_size != 0:
raise ValueError("Query sided sequence length must be multiple of block size")
if to_seq_length % to_block_size != 0:
raise ValueError("Key/Value sided sequence length must be multiple of block size")
query_layer = self.transpose_for_scores(self.query(hidden_states))
key_layer = self.transpose_for_scores(self.key(hidden_states))
......@@ -904,9 +908,8 @@ class BigBirdPegasusBlockSparseAttention(nn.Module):
"""
# using this method when from_seq_length in [1024, 3072, 4096]
assert (
from_seq_length // from_block_size == to_seq_length // to_block_size
), "Error the number of blocks needs to be same!"
if from_seq_length // from_block_size != to_seq_length // to_block_size:
raise ValueError("Error the number of blocks needs to be same!")
rand_attn = np.zeros((from_seq_length // from_block_size - 2, num_rand_blocks), dtype=np.int32)
middle_seq = np.arange(1, to_seq_length // to_block_size - 1, dtype=np.int32)
......@@ -980,11 +983,11 @@ class BigBirdPegasusBlockSparseAttention(nn.Module):
"""
# using this method when from_seq_length not in [1024, 3072, 4096]
assert (
from_seq_length // from_block_size == to_seq_length // to_block_size
), "Error the number of blocks needs to be same!"
if from_seq_length // from_block_size != to_seq_length // to_block_size:
raise ValueError("Error the number of blocks needs to be same!")
assert from_seq_length in plan_from_length, "Error from sequence length not in plan!"
if from_seq_length not in plan_from_length:
raise ValueError("Error from sequence length not in plan!")
# Total number of blocks in the mmask
num_blocks = from_seq_length // from_block_size
......@@ -1914,9 +1917,10 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel):
# check if head_mask has a correct number of layers specified if desired
if head_mask is not None:
assert head_mask.size()[0] == (
len(self.layers)
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
if head_mask.size()[0] != len(self.layers):
raise ValueError(
f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
)
for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states:
......@@ -1997,9 +2001,10 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel):
def create_masks_for_block_sparse_attn(attention_mask: torch.Tensor, block_size: int):
batch_size, seq_length = attention_mask.size()
assert (
seq_length % block_size == 0
), f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}."
if seq_length % block_size != 0:
raise ValueError(
f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}."
)
def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask):
"""
......@@ -2242,9 +2247,10 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel):
# check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired
for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]):
if attn_mask is not None:
assert attn_mask.size()[0] == (
len(self.layers)
), f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
if attn_mask.size()[0] != len(self.layers):
raise ValueError(
f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
)
for idx, decoder_layer in enumerate(self.layers):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
......
......@@ -747,9 +747,10 @@ class BlenderbotEncoder(BlenderbotPreTrainedModel):
# check if head_mask has a correct number of layers specified if desired
if head_mask is not None:
assert head_mask.size()[0] == (
len(self.layers)
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
if head_mask.size()[0] != len(self.layers):
raise ValueError(
f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
)
for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
......@@ -986,9 +987,10 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel):
# check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired
for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]):
if attn_mask is not None:
assert attn_mask.size()[0] == (
len(self.layers)
), f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
if attn_mask.size()[0] != len(self.layers):
raise ValueError(
f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
)
for idx, decoder_layer in enumerate(self.layers):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
......
......@@ -746,9 +746,10 @@ class BlenderbotSmallEncoder(BlenderbotSmallPreTrainedModel):
# check if head_mask has a correct number of layers specified if desired
if head_mask is not None:
assert head_mask.size()[0] == (
len(self.layers)
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
if head_mask.size()[0] != len(self.layers):
raise ValueError(
f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
)
for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
......@@ -983,9 +984,10 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel):
# check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired
for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]):
if attn_mask is not None:
assert attn_mask.size()[0] == (
len(self.layers)
), f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
if attn_mask.size()[0] != len(self.layers):
raise ValueError(
f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
)
for idx, decoder_layer in enumerate(self.layers):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states:
......
......@@ -184,13 +184,10 @@ def load_tf_weights_in_canine(model, config, tf_checkpoint_path):
pointer = getattr(pointer, "weight")
elif m_name == "kernel":
array = np.transpose(array)
try:
assert (
pointer.shape == array.shape
), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
if pointer.shape != array.shape:
raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched")
logger.info(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array)
return model
......
......@@ -179,9 +179,10 @@ class CLIPAttention(nn.Module):
self.embed_dim = config.hidden_size
self.num_heads = config.num_attention_heads
self.head_dim = self.embed_dim // self.num_heads
assert (
self.head_dim * self.num_heads == self.embed_dim
), f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})."
if self.head_dim * self.num_heads != self.embed_dim:
raise ValueError(
f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})."
)
self.scale = self.head_dim**-0.5
self.dropout = config.attention_dropout
......
......@@ -259,9 +259,10 @@ class FlaxCLIPAttention(nn.Module):
self.embed_dim = self.config.hidden_size
self.num_heads = self.config.num_attention_heads
self.head_dim = self.embed_dim // self.num_heads
assert (
self.head_dim * self.num_heads == self.embed_dim
), f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})."
if self.head_dim * self.num_heads != self.embed_dim:
raise ValueError(
f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})."
)
self.scale = self.head_dim**-0.5
self.dropout = self.config.attention_dropout
......
......@@ -317,9 +317,8 @@ class ConvBertSelfAttention(nn.Module):
self.head_ratio = config.head_ratio
self.conv_kernel_size = config.conv_kernel_size
assert (
config.hidden_size % self.num_attention_heads == 0
), "hidden_size should be divisible by num_attention_heads"
if config.hidden_size % self.num_attention_heads != 0:
raise ValueError("hidden_size should be divisible by num_attention_heads")
self.attention_head_size = config.hidden_size // config.num_attention_heads
self.all_head_size = self.num_attention_heads * self.attention_head_size
......@@ -554,7 +553,8 @@ class ConvBertLayer(nn.Module):
self.is_decoder = config.is_decoder
self.add_cross_attention = config.add_cross_attention
if self.add_cross_attention:
assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added"
if not self.is_decoder:
raise TypeError(f"{self} should be used as a decoder model if cross attention is added")
self.crossattention = ConvBertAttention(config)
self.intermediate = ConvBertIntermediate(config)
self.output = ConvBertOutput(config)
......@@ -578,9 +578,10 @@ class ConvBertLayer(nn.Module):
outputs = self_attention_outputs[1:] # add self attentions if we output attention weights
if self.is_decoder and encoder_hidden_states is not None:
assert hasattr(
self, "crossattention"
), f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
if not hasattr(self, "crossattention"):
raise AttributeError(
f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
)
cross_attention_outputs = self.crossattention(
attention_output,
encoder_attention_mask,
......
......@@ -168,9 +168,8 @@ class TFConvBertSelfAttention(tf.keras.layers.Layer):
self.num_attention_heads = num_attention_heads
self.conv_kernel_size = config.conv_kernel_size
assert (
config.hidden_size % self.num_attention_heads == 0
), "hidden_size should be divisible by num_attention_heads"
if config.hidden_size % self.num_attention_heads != 0:
raise ValueError("hidden_size should be divisible by num_attention_heads")
self.attention_head_size = config.hidden_size // config.num_attention_heads
self.all_head_size = self.num_attention_heads * self.attention_head_size
......
......@@ -404,7 +404,8 @@ class CTRLModel(CTRLPreTrainedModel):
# Attention mask.
if attention_mask is not None:
assert batch_size > 0, "batch_size has to be defined and > 0"
if batch_size <= 0:
raise ValueError("batch_size has to be defined and > 0")
attention_mask = attention_mask.view(batch_size, -1)
# We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length]
......@@ -669,9 +670,8 @@ class CTRLForSequenceClassification(CTRLPreTrainedModel):
else:
batch_size, sequence_length = inputs_embeds.shape[:2]
assert (
self.config.pad_token_id is not None or batch_size == 1
), "Cannot handle batch sizes > 1 if no padding token is defined."
if self.config.pad_token_id is None and batch_size != 1:
raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
if self.config.pad_token_id is None:
sequence_lengths = -1
......
......@@ -816,9 +816,8 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific
batch_size, sequence_length = shape_list(input_ids)[:2]
else:
batch_size, sequence_length = shape_list(inputs_embeds)[:2]
assert (
self.config.pad_token_id is not None or batch_size == 1
), "Cannot handle batch sizes > 1 if no padding token is defined."
if self.config.pad_token_id is None and batch_size != 1:
raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
if not tf.is_tensor(sequence_lengths):
in_logits = logits[0:batch_size, sequence_lengths]
......
......@@ -292,7 +292,8 @@ class SPMTokenizer:
self.vocab_file = vocab_file
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
spm = sp.SentencePieceProcessor(**self.sp_model_kwargs)
assert os.path.exists(vocab_file)
if not os.path.exists(vocab_file):
raise FileNotFoundError(f"{vocab_file} does not exist!")
spm.load(vocab_file)
bpe_vocab_size = spm.GetPieceSize()
# Token map
......
......@@ -412,7 +412,8 @@ class DetrSinePositionEmbedding(nn.Module):
self.scale = scale
def forward(self, pixel_values, pixel_mask):
assert pixel_mask is not None, "No pixel mask provided"
if pixel_mask is None:
raise ValueError("No pixel mask provided")
y_embed = pixel_mask.cumsum(1, dtype=torch.float32)
x_embed = pixel_mask.cumsum(2, dtype=torch.float32)
if self.normalize:
......@@ -486,9 +487,10 @@ class DetrAttention(nn.Module):
self.num_heads = num_heads
self.dropout = dropout
self.head_dim = embed_dim // num_heads
assert (
self.head_dim * num_heads == self.embed_dim
), f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {num_heads})."
if self.head_dim * num_heads != self.embed_dim:
raise ValueError(
f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {num_heads})."
)
self.scaling = self.head_dim**-0.5
self.k_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
......@@ -1254,7 +1256,8 @@ class DetrModel(DetrPreTrainedModel):
# get final feature map and downsampled mask
feature_map, mask = features[-1]
assert mask is not None, "Backbone does not return downsampled pixel mask"
if mask is None:
raise ValueError("Backbone does not return downsampled pixel mask")
# Second, apply 1x1 convolution to reduce the channel dimension to d_model (256 by default)
projected_feature_map = self.input_projection(feature_map)
......@@ -1709,9 +1712,10 @@ class DetrMaskHeadSmallConv(nn.Module):
def __init__(self, dim, fpn_dims, context_dim):
super().__init__()
assert (
dim % 8 == 0
), "The hidden_size + number of attention heads must be divisible by 8 as the number of groups in GroupNorm is set to 8"
if dim % 8 != 0:
raise ValueError(
"The hidden_size + number of attention heads must be divisible by 8 as the number of groups in GroupNorm is set to 8"
)
inter_dims = [dim, context_dim // 2, context_dim // 4, context_dim // 8, context_dim // 16, context_dim // 64]
......@@ -1897,7 +1901,8 @@ class DetrLoss(nn.Module):
Classification loss (NLL) targets dicts must contain the key "class_labels" containing a tensor of dim
[nb_target_boxes]
"""
assert "logits" in outputs, "No logits were found in the outputs"
if "logits" not in outputs:
raise KeyError("No logits were found in the outputs")
src_logits = outputs["logits"]
idx = self._get_src_permutation_idx(indices)
......@@ -1935,7 +1940,8 @@ class DetrLoss(nn.Module):
Targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]. The target boxes
are expected in format (center_x, center_y, w, h), normalized by the image size.
"""
assert "pred_boxes" in outputs, "No predicted boxes found in outputs"
if "pred_boxes" not in outputs:
raise KeyError("No predicted boxes found in outputs")
idx = self._get_src_permutation_idx(indices)
src_boxes = outputs["pred_boxes"][idx]
target_boxes = torch.cat([t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0)
......@@ -1957,7 +1963,8 @@ class DetrLoss(nn.Module):
Targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w].
"""
assert "pred_masks" in outputs, "No predicted masks found in outputs"
if "pred_masks" not in outputs:
raise KeyError("No predicted masks found in outputs")
src_idx = self._get_src_permutation_idx(indices)
tgt_idx = self._get_tgt_permutation_idx(indices)
......@@ -2002,7 +2009,8 @@ class DetrLoss(nn.Module):
"boxes": self.loss_boxes,
"masks": self.loss_masks,
}
assert loss in loss_map, f"Loss {loss} not supported"
if loss not in loss_map:
raise ValueError(f"Loss {loss} not supported")
return loss_map[loss](outputs, targets, indices, num_boxes)
def forward(self, outputs, targets):
......@@ -2097,7 +2105,8 @@ class DetrHungarianMatcher(nn.Module):
self.class_cost = class_cost
self.bbox_cost = bbox_cost
self.giou_cost = giou_cost
assert class_cost != 0 or bbox_cost != 0 or giou_cost != 0, "All costs of the Matcher can't be 0"
if class_cost == 0 or bbox_cost == 0 or giou_cost == 0:
raise ValueError("All costs of the Matcher can't be 0")
@torch.no_grad()
def forward(self, outputs, targets):
......
......@@ -176,7 +176,8 @@ class DPREncoder(DPRPreTrainedModel):
def __init__(self, config: DPRConfig):
super().__init__(config)
self.bert_model = BertModel(config, add_pooling_layer=False)
assert self.bert_model.config.hidden_size > 0, "Encoder hidden_size can't be zero"
if self.bert_model.config.hidden_size <= 0:
raise ValueError("Encoder hidden_size can't be zero")
self.projection_dim = config.projection_dim
if self.projection_dim > 0:
self.encode_proj = nn.Linear(self.bert_model.config.hidden_size, config.projection_dim)
......
......@@ -156,7 +156,8 @@ class TFDPREncoderLayer(tf.keras.layers.Layer):
self.bert_model = TFBertMainLayer(config, add_pooling_layer=False, name="bert_model")
self.config = config
assert self.config.hidden_size > 0, "Encoder hidden_size can't be zero"
if self.config.hidden_size <= 0:
raise ValueError("Encoder hidden_size can't be zero")
self.projection_dim = config.projection_dim
if self.projection_dim > 0:
self.encode_proj = tf.keras.layers.Dense(
......
......@@ -234,9 +234,10 @@ class CustomDPRReaderTokenizerMixin:
texts = texts if not isinstance(texts, str) else [texts]
n_passages = len(titles)
questions = questions if not isinstance(questions, str) else [questions] * n_passages
assert len(titles) == len(
texts
), f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts."
if len(titles) != len(texts):
raise ValueError(
f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts."
)
encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"]
encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"]
encoded_inputs = {
......@@ -347,9 +348,11 @@ class CustomDPRReaderTokenizerMixin:
scores = sorted(scores, key=lambda x: x[1], reverse=True)
chosen_span_intervals = []
for (start_index, end_index), score in scores:
assert start_index <= end_index, f"Wrong span indices: [{start_index}:{end_index}]"
if start_index > end_index:
raise ValueError(f"Wrong span indices: [{start_index}:{end_index}]")
length = end_index - start_index + 1
assert length <= max_answer_length, f"Span is too long: {length} > {max_answer_length}"
if length > max_answer_length:
raise ValueError(f"Span is too long: {length} > {max_answer_length}")
if any(
[
start_index <= prev_start_index <= prev_end_index <= end_index
......
......@@ -115,13 +115,9 @@ def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path):
# if vocab is padded, then trim off the padding embeddings
array = array[: config.vocab_size]
try:
assert (
pointer.shape == array.shape
), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched {name}"
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
if pointer.shape != array.shape:
raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched {name}")
print(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array)
......@@ -552,7 +548,8 @@ class GPTNeoModel(GPTNeoPreTrainedModel):
# Attention mask.
if attention_mask is not None:
assert batch_size > 0, "batch_size has to be defined and > 0"
if batch_size <= 0:
raise ValueError("batch_size has to be defined and > 0")
attention_mask = attention_mask.view(batch_size, -1)
# We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length]
......@@ -875,9 +872,8 @@ class GPTNeoForSequenceClassification(GPTNeoPreTrainedModel):
else:
batch_size, sequence_length = inputs_embeds.shape[:2]
assert (
self.config.pad_token_id is not None or batch_size == 1
), "Cannot handle batch sizes > 1 if no padding token is defined."
if self.config.pad_token_id is None and batch_size != 1:
raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
if self.config.pad_token_id is None:
sequence_lengths = -1
else:
......
......@@ -573,7 +573,8 @@ class GPTJModel(GPTJPreTrainedModel):
# Attention mask.
if attention_mask is not None:
assert batch_size > 0, "batch_size has to be defined and > 0"
if batch_size <= 0:
raise ValueError("batch_size has to be defined and > 0")
attention_mask = attention_mask.view(batch_size, -1)
# We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length]
......@@ -939,9 +940,8 @@ class GPTJForSequenceClassification(GPTJPreTrainedModel):
else:
batch_size = inputs_embeds.shape[0]
assert (
self.config.pad_token_id is not None or batch_size == 1
), "Cannot handle batch sizes > 1 if no padding token is defined."
if self.config.pad_token_id is None and batch_size != 1:
raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
if self.config.pad_token_id is None:
sequence_lengths = -1
else:
......
......@@ -934,9 +934,8 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific
loss = None
if labels is not None:
assert (
self.config.pad_token_id is not None or logits_shape[0] == 1
), "Cannot handle batch sizes > 1 if no padding token is defined."
if self.config.pad_token_id is None and logits_shape[0] != 1:
raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
if not tf.is_tensor(sequence_lengths):
in_logits = logits[0 : logits_shape[0], sequence_lengths]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment