Unverified Commit cc034f72 authored by Anmol Joshi's avatar Anmol Joshi Committed by GitHub
Browse files

Replace assertion with exception (#16720)



* Updated assertions to exceptions

* updated assertions to exceptions

* bug fixes

* fix-copies

* Update modeling_ctrl.py

* Update src/transformers/models/ctrl/modeling_tf_ctrl.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/gpt_neo/modeling_gpt_neo.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/gptj/modeling_gptj.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/models/gptj/modeling_tf_gptj.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update modeling_led.py

* Update modeling_led.py

* Update modeling_led.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent 14daa610
...@@ -145,7 +145,8 @@ def load_tf_weights_in_big_bird(model, tf_checkpoint_path, is_trivia_qa=False): ...@@ -145,7 +145,8 @@ def load_tf_weights_in_big_bird(model, tf_checkpoint_path, is_trivia_qa=False):
# Load weights from TF model # Load weights from TF model
init_vars = tf.saved_model.load(tf_path).variables if is_trivia_qa else tf.train.list_variables(tf_path) init_vars = tf.saved_model.load(tf_path).variables if is_trivia_qa else tf.train.list_variables(tf_path)
assert len(init_vars) > 0, "Loaded trained variables cannot be empty." if len(init_vars) <= 0:
raise ValueError("Loaded trained variables cannot be empty.")
pt_names = list(model.state_dict().keys()) pt_names = list(model.state_dict().keys())
...@@ -460,8 +461,11 @@ class BigBirdBlockSparseAttention(nn.Module): ...@@ -460,8 +461,11 @@ class BigBirdBlockSparseAttention(nn.Module):
to_seq_length = from_seq_length = seqlen to_seq_length = from_seq_length = seqlen
from_block_size = to_block_size = self.block_size from_block_size = to_block_size = self.block_size
assert from_seq_length % from_block_size == 0, "Query sided sequence length must be multiple of block size" if from_seq_length % from_block_size != 0:
assert to_seq_length % to_block_size == 0, "Key/Value sided sequence length must be multiple of block size" raise ValueError("Query sided sequence length must be multiple of block size")
if to_seq_length % to_block_size != 0:
raise ValueError("Key/Value sided sequence length must be multiple of block size")
query_layer = self.transpose_for_scores(self.query(hidden_states)) query_layer = self.transpose_for_scores(self.query(hidden_states))
key_layer = self.transpose_for_scores(self.key(hidden_states)) key_layer = self.transpose_for_scores(self.key(hidden_states))
...@@ -1077,9 +1081,8 @@ class BigBirdBlockSparseAttention(nn.Module): ...@@ -1077,9 +1081,8 @@ class BigBirdBlockSparseAttention(nn.Module):
""" """
# using this method when from_seq_length in [1024, 3072, 4096] # using this method when from_seq_length in [1024, 3072, 4096]
assert ( if from_seq_length // from_block_size != to_seq_length // to_block_size:
from_seq_length // from_block_size == to_seq_length // to_block_size raise ValueError("Error the number of blocks needs to be same!")
), "Error the number of blocks needs to be same!"
rand_attn = np.zeros((from_seq_length // from_block_size - 2, num_rand_blocks), dtype=np.int32) rand_attn = np.zeros((from_seq_length // from_block_size - 2, num_rand_blocks), dtype=np.int32)
middle_seq = np.arange(1, to_seq_length // to_block_size - 1, dtype=np.int32) middle_seq = np.arange(1, to_seq_length // to_block_size - 1, dtype=np.int32)
...@@ -1153,11 +1156,11 @@ class BigBirdBlockSparseAttention(nn.Module): ...@@ -1153,11 +1156,11 @@ class BigBirdBlockSparseAttention(nn.Module):
""" """
# using this method when from_seq_length not in [1024, 3072, 4096] # using this method when from_seq_length not in [1024, 3072, 4096]
assert ( if from_seq_length // from_block_size != to_seq_length // to_block_size:
from_seq_length // from_block_size == to_seq_length // to_block_size raise ValueError("Error the number of blocks needs to be same!")
), "Error the number of blocks needs to be same!"
assert from_seq_length in plan_from_length, "Error from sequence length not in plan!" if from_seq_length not in plan_from_length:
raise ValueError("Error from sequence length not in plan!")
# Total number of blocks in the mmask # Total number of blocks in the mmask
num_blocks = from_seq_length // from_block_size num_blocks = from_seq_length // from_block_size
...@@ -1397,9 +1400,8 @@ class BigBirdAttention(nn.Module): ...@@ -1397,9 +1400,8 @@ class BigBirdAttention(nn.Module):
output_attentions, output_attentions,
) )
else: else:
assert ( if encoder_hidden_states is not None:
encoder_hidden_states is None raise ValueError("BigBird cannot be used as a decoder when config.attention_type != 'original_full'")
), "BigBird cannot be used as a decoder when config.attention_type != 'original_full'"
self_outputs = self.self( self_outputs = self.self(
hidden_states, band_mask, from_mask, to_mask, from_blocked_mask, to_blocked_mask, output_attentions hidden_states, band_mask, from_mask, to_mask, from_blocked_mask, to_blocked_mask, output_attentions
) )
...@@ -1451,7 +1453,8 @@ class BigBirdLayer(nn.Module): ...@@ -1451,7 +1453,8 @@ class BigBirdLayer(nn.Module):
self.is_decoder = config.is_decoder self.is_decoder = config.is_decoder
self.add_cross_attention = config.add_cross_attention self.add_cross_attention = config.add_cross_attention
if self.add_cross_attention: if self.add_cross_attention:
assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added" if not self.is_decoder:
raise TypeError(f"{self} should be used as a decoder model if cross attention is added")
self.crossattention = BigBirdAttention(config) self.crossattention = BigBirdAttention(config)
self.intermediate = BigBirdIntermediate(config) self.intermediate = BigBirdIntermediate(config)
self.output = BigBirdOutput(config) self.output = BigBirdOutput(config)
...@@ -2183,9 +2186,10 @@ class BigBirdModel(BigBirdPreTrainedModel): ...@@ -2183,9 +2186,10 @@ class BigBirdModel(BigBirdPreTrainedModel):
def create_masks_for_block_sparse_attn(attention_mask: torch.Tensor, block_size: int): def create_masks_for_block_sparse_attn(attention_mask: torch.Tensor, block_size: int):
batch_size, seq_length = attention_mask.size() batch_size, seq_length = attention_mask.size()
assert ( if seq_length % block_size != 0:
seq_length % block_size == 0 raise ValueError(
), f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}." f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}."
)
def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask): def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask):
""" """
...@@ -2454,7 +2458,8 @@ class BigBirdForMaskedLM(BigBirdPreTrainedModel): ...@@ -2454,7 +2458,8 @@ class BigBirdForMaskedLM(BigBirdPreTrainedModel):
effective_batch_size = input_shape[0] effective_batch_size = input_shape[0]
# add a dummy token # add a dummy token
assert self.config.pad_token_id is not None, "The PAD token should be defined for generation" if self.config.pad_token_id is None:
raise ValueError("The PAD token should be defined for generation")
attention_mask = torch.cat([attention_mask, attention_mask.new_zeros((attention_mask.shape[0], 1))], dim=-1) attention_mask = torch.cat([attention_mask, attention_mask.new_zeros((attention_mask.shape[0], 1))], dim=-1)
dummy_token = torch.full( dummy_token = torch.full(
(effective_batch_size, 1), self.config.pad_token_id, dtype=torch.long, device=input_ids.device (effective_batch_size, 1), self.config.pad_token_id, dtype=torch.long, device=input_ids.device
......
...@@ -389,9 +389,10 @@ class FlaxBigBirdBlockSparseAttention(nn.Module): ...@@ -389,9 +389,10 @@ class FlaxBigBirdBlockSparseAttention(nn.Module):
def create_masks_for_block_sparse_attn(attention_mask, block_size: int): def create_masks_for_block_sparse_attn(attention_mask, block_size: int):
batch_size, seq_length = attention_mask.shape batch_size, seq_length = attention_mask.shape
assert ( if seq_length % block_size != 0:
seq_length % block_size == 0 raise ValueError(
), f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}." f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}."
)
def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask): def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask):
""" """
...@@ -464,8 +465,12 @@ class FlaxBigBirdBlockSparseAttention(nn.Module): ...@@ -464,8 +465,12 @@ class FlaxBigBirdBlockSparseAttention(nn.Module):
to_seq_len = key_layer.shape[2] to_seq_len = key_layer.shape[2]
from_block_size = to_block_size = self.config.block_size from_block_size = to_block_size = self.config.block_size
assert from_seq_len % from_block_size == 0, "Query sided sequence length must be multiple of block size" if from_seq_len % from_block_size != 0:
assert to_seq_len % to_block_size == 0, "Key/Value sided sequence length must be multiple of block size" raise ValueError("Query sided sequence length must be multiple of block size")
if to_seq_len % to_block_size != 0:
raise ValueError("Key/Value sided sequence length must be multiple of block size")
if from_seq_len // from_block_size != to_seq_len // to_block_size: if from_seq_len // from_block_size != to_seq_len // to_block_size:
raise ValueError("Error the number of blocks needs to be same!") raise ValueError("Error the number of blocks needs to be same!")
...@@ -863,9 +868,8 @@ class FlaxBigBirdBlockSparseAttention(nn.Module): ...@@ -863,9 +868,8 @@ class FlaxBigBirdBlockSparseAttention(nn.Module):
""" """
# using this method when from_seq_length in [1024, 3072, 4096] # using this method when from_seq_length in [1024, 3072, 4096]
assert ( if from_seq_length // from_block_size != to_seq_length // to_block_size:
from_seq_length // from_block_size == to_seq_length // to_block_size raise ValueError("Error the number of blocks needs to be same!")
), "Error the number of blocks needs to be same!"
rand_attn = np.zeros((from_seq_length // from_block_size - 2, num_rand_blocks), dtype=np.int32) rand_attn = np.zeros((from_seq_length // from_block_size - 2, num_rand_blocks), dtype=np.int32)
middle_seq = np.arange(1, to_seq_length // to_block_size - 1, dtype=np.int32) middle_seq = np.arange(1, to_seq_length // to_block_size - 1, dtype=np.int32)
...@@ -939,11 +943,11 @@ class FlaxBigBirdBlockSparseAttention(nn.Module): ...@@ -939,11 +943,11 @@ class FlaxBigBirdBlockSparseAttention(nn.Module):
""" """
# using this method when from_seq_length not in [1024, 3072, 4096] # using this method when from_seq_length not in [1024, 3072, 4096]
assert ( if from_seq_length // from_block_size != to_seq_length // to_block_size:
from_seq_length // from_block_size == to_seq_length // to_block_size raise ValueError("Error the number of blocks needs to be same!")
), "Error the number of blocks needs to be same!"
assert from_seq_length in plan_from_length, "Error from sequence length not in plan!" if from_seq_length not in plan_from_length:
raise ValueError("Error from sequence length not in plan!")
# Total number of blocks in the mmask # Total number of blocks in the mmask
num_blocks = from_seq_length // from_block_size num_blocks = from_seq_length // from_block_size
......
...@@ -83,7 +83,8 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start ...@@ -83,7 +83,8 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start
shifted_input_ids[:, 1:] = input_ids[:, :-1].clone() shifted_input_ids[:, 1:] = input_ids[:, :-1].clone()
shifted_input_ids[:, 0] = decoder_start_token_id shifted_input_ids[:, 0] = decoder_start_token_id
assert pad_token_id is not None, "self.model.config.pad_token_id has to be defined." if pad_token_id is None:
raise ValueError("self.model.config.pad_token_id has to be defined.")
# replace possible -100 values in labels by `pad_token_id` # replace possible -100 values in labels by `pad_token_id`
shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id) shifted_input_ids.masked_fill_(shifted_input_ids == -100, pad_token_id)
...@@ -287,8 +288,11 @@ class BigBirdPegasusBlockSparseAttention(nn.Module): ...@@ -287,8 +288,11 @@ class BigBirdPegasusBlockSparseAttention(nn.Module):
to_seq_length = from_seq_length = seqlen to_seq_length = from_seq_length = seqlen
from_block_size = to_block_size = self.block_size from_block_size = to_block_size = self.block_size
assert from_seq_length % from_block_size == 0, "Query sided sequence length must be multiple of block size" if from_seq_length % from_block_size != 0:
assert to_seq_length % to_block_size == 0, "Key/Value sided sequence length must be multiple of block size" raise ValueError("Query sided sequence length must be multiple of block size")
if to_seq_length % to_block_size != 0:
raise ValueError("Key/Value sided sequence length must be multiple of block size")
query_layer = self.transpose_for_scores(self.query(hidden_states)) query_layer = self.transpose_for_scores(self.query(hidden_states))
key_layer = self.transpose_for_scores(self.key(hidden_states)) key_layer = self.transpose_for_scores(self.key(hidden_states))
...@@ -904,9 +908,8 @@ class BigBirdPegasusBlockSparseAttention(nn.Module): ...@@ -904,9 +908,8 @@ class BigBirdPegasusBlockSparseAttention(nn.Module):
""" """
# using this method when from_seq_length in [1024, 3072, 4096] # using this method when from_seq_length in [1024, 3072, 4096]
assert ( if from_seq_length // from_block_size != to_seq_length // to_block_size:
from_seq_length // from_block_size == to_seq_length // to_block_size raise ValueError("Error the number of blocks needs to be same!")
), "Error the number of blocks needs to be same!"
rand_attn = np.zeros((from_seq_length // from_block_size - 2, num_rand_blocks), dtype=np.int32) rand_attn = np.zeros((from_seq_length // from_block_size - 2, num_rand_blocks), dtype=np.int32)
middle_seq = np.arange(1, to_seq_length // to_block_size - 1, dtype=np.int32) middle_seq = np.arange(1, to_seq_length // to_block_size - 1, dtype=np.int32)
...@@ -980,11 +983,11 @@ class BigBirdPegasusBlockSparseAttention(nn.Module): ...@@ -980,11 +983,11 @@ class BigBirdPegasusBlockSparseAttention(nn.Module):
""" """
# using this method when from_seq_length not in [1024, 3072, 4096] # using this method when from_seq_length not in [1024, 3072, 4096]
assert ( if from_seq_length // from_block_size != to_seq_length // to_block_size:
from_seq_length // from_block_size == to_seq_length // to_block_size raise ValueError("Error the number of blocks needs to be same!")
), "Error the number of blocks needs to be same!"
assert from_seq_length in plan_from_length, "Error from sequence length not in plan!" if from_seq_length not in plan_from_length:
raise ValueError("Error from sequence length not in plan!")
# Total number of blocks in the mmask # Total number of blocks in the mmask
num_blocks = from_seq_length // from_block_size num_blocks = from_seq_length // from_block_size
...@@ -1914,9 +1917,10 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel): ...@@ -1914,9 +1917,10 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel):
# check if head_mask has a correct number of layers specified if desired # check if head_mask has a correct number of layers specified if desired
if head_mask is not None: if head_mask is not None:
assert head_mask.size()[0] == ( if head_mask.size()[0] != len(self.layers):
len(self.layers) raise ValueError(
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
)
for idx, encoder_layer in enumerate(self.layers): for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states: if output_hidden_states:
...@@ -1997,9 +2001,10 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel): ...@@ -1997,9 +2001,10 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel):
def create_masks_for_block_sparse_attn(attention_mask: torch.Tensor, block_size: int): def create_masks_for_block_sparse_attn(attention_mask: torch.Tensor, block_size: int):
batch_size, seq_length = attention_mask.size() batch_size, seq_length = attention_mask.size()
assert ( if seq_length % block_size != 0:
seq_length % block_size == 0 raise ValueError(
), f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}." f"Sequence length must be multiple of block size, but sequence length is {seq_length}, while block size is {block_size}."
)
def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask): def create_band_mask_from_inputs(from_blocked_mask, to_blocked_mask):
""" """
...@@ -2242,9 +2247,10 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel): ...@@ -2242,9 +2247,10 @@ class BigBirdPegasusDecoder(BigBirdPegasusPreTrainedModel):
# check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired
for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]): for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]):
if attn_mask is not None: if attn_mask is not None:
assert attn_mask.size()[0] == ( if attn_mask.size()[0] != len(self.layers):
len(self.layers) raise ValueError(
), f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
)
for idx, decoder_layer in enumerate(self.layers): for idx, decoder_layer in enumerate(self.layers):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
......
...@@ -747,9 +747,10 @@ class BlenderbotEncoder(BlenderbotPreTrainedModel): ...@@ -747,9 +747,10 @@ class BlenderbotEncoder(BlenderbotPreTrainedModel):
# check if head_mask has a correct number of layers specified if desired # check if head_mask has a correct number of layers specified if desired
if head_mask is not None: if head_mask is not None:
assert head_mask.size()[0] == ( if head_mask.size()[0] != len(self.layers):
len(self.layers) raise ValueError(
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
)
for idx, encoder_layer in enumerate(self.layers): for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states: if output_hidden_states:
encoder_states = encoder_states + (hidden_states,) encoder_states = encoder_states + (hidden_states,)
...@@ -986,9 +987,10 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel): ...@@ -986,9 +987,10 @@ class BlenderbotDecoder(BlenderbotPreTrainedModel):
# check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired
for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]): for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]):
if attn_mask is not None: if attn_mask is not None:
assert attn_mask.size()[0] == ( if attn_mask.size()[0] != len(self.layers):
len(self.layers) raise ValueError(
), f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
)
for idx, decoder_layer in enumerate(self.layers): for idx, decoder_layer in enumerate(self.layers):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
......
...@@ -746,9 +746,10 @@ class BlenderbotSmallEncoder(BlenderbotSmallPreTrainedModel): ...@@ -746,9 +746,10 @@ class BlenderbotSmallEncoder(BlenderbotSmallPreTrainedModel):
# check if head_mask has a correct number of layers specified if desired # check if head_mask has a correct number of layers specified if desired
if head_mask is not None: if head_mask is not None:
assert head_mask.size()[0] == ( if head_mask.size()[0] != len(self.layers):
len(self.layers) raise ValueError(
), f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." f"The head_mask should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
)
for idx, encoder_layer in enumerate(self.layers): for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states: if output_hidden_states:
encoder_states = encoder_states + (hidden_states,) encoder_states = encoder_states + (hidden_states,)
...@@ -983,9 +984,10 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel): ...@@ -983,9 +984,10 @@ class BlenderbotSmallDecoder(BlenderbotSmallPreTrainedModel):
# check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired
for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]): for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]):
if attn_mask is not None: if attn_mask is not None:
assert attn_mask.size()[0] == ( if attn_mask.size()[0] != len(self.layers):
len(self.layers) raise ValueError(
), f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}." f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for {head_mask.size()[0]}."
)
for idx, decoder_layer in enumerate(self.layers): for idx, decoder_layer in enumerate(self.layers):
# add LayerDrop (see https://arxiv.org/abs/1909.11556 for description) # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
if output_hidden_states: if output_hidden_states:
......
...@@ -184,13 +184,10 @@ def load_tf_weights_in_canine(model, config, tf_checkpoint_path): ...@@ -184,13 +184,10 @@ def load_tf_weights_in_canine(model, config, tf_checkpoint_path):
pointer = getattr(pointer, "weight") pointer = getattr(pointer, "weight")
elif m_name == "kernel": elif m_name == "kernel":
array = np.transpose(array) array = np.transpose(array)
try:
assert ( if pointer.shape != array.shape:
pointer.shape == array.shape raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched")
), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
logger.info(f"Initialize PyTorch weight {name}") logger.info(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array) pointer.data = torch.from_numpy(array)
return model return model
......
...@@ -179,9 +179,10 @@ class CLIPAttention(nn.Module): ...@@ -179,9 +179,10 @@ class CLIPAttention(nn.Module):
self.embed_dim = config.hidden_size self.embed_dim = config.hidden_size
self.num_heads = config.num_attention_heads self.num_heads = config.num_attention_heads
self.head_dim = self.embed_dim // self.num_heads self.head_dim = self.embed_dim // self.num_heads
assert ( if self.head_dim * self.num_heads != self.embed_dim:
self.head_dim * self.num_heads == self.embed_dim raise ValueError(
), f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})." f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})."
)
self.scale = self.head_dim**-0.5 self.scale = self.head_dim**-0.5
self.dropout = config.attention_dropout self.dropout = config.attention_dropout
......
...@@ -259,9 +259,10 @@ class FlaxCLIPAttention(nn.Module): ...@@ -259,9 +259,10 @@ class FlaxCLIPAttention(nn.Module):
self.embed_dim = self.config.hidden_size self.embed_dim = self.config.hidden_size
self.num_heads = self.config.num_attention_heads self.num_heads = self.config.num_attention_heads
self.head_dim = self.embed_dim // self.num_heads self.head_dim = self.embed_dim // self.num_heads
assert ( if self.head_dim * self.num_heads != self.embed_dim:
self.head_dim * self.num_heads == self.embed_dim raise ValueError(
), f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})." f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {self.num_heads})."
)
self.scale = self.head_dim**-0.5 self.scale = self.head_dim**-0.5
self.dropout = self.config.attention_dropout self.dropout = self.config.attention_dropout
......
...@@ -317,9 +317,8 @@ class ConvBertSelfAttention(nn.Module): ...@@ -317,9 +317,8 @@ class ConvBertSelfAttention(nn.Module):
self.head_ratio = config.head_ratio self.head_ratio = config.head_ratio
self.conv_kernel_size = config.conv_kernel_size self.conv_kernel_size = config.conv_kernel_size
assert ( if config.hidden_size % self.num_attention_heads != 0:
config.hidden_size % self.num_attention_heads == 0 raise ValueError("hidden_size should be divisible by num_attention_heads")
), "hidden_size should be divisible by num_attention_heads"
self.attention_head_size = config.hidden_size // config.num_attention_heads self.attention_head_size = config.hidden_size // config.num_attention_heads
self.all_head_size = self.num_attention_heads * self.attention_head_size self.all_head_size = self.num_attention_heads * self.attention_head_size
...@@ -554,7 +553,8 @@ class ConvBertLayer(nn.Module): ...@@ -554,7 +553,8 @@ class ConvBertLayer(nn.Module):
self.is_decoder = config.is_decoder self.is_decoder = config.is_decoder
self.add_cross_attention = config.add_cross_attention self.add_cross_attention = config.add_cross_attention
if self.add_cross_attention: if self.add_cross_attention:
assert self.is_decoder, f"{self} should be used as a decoder model if cross attention is added" if not self.is_decoder:
raise TypeError(f"{self} should be used as a decoder model if cross attention is added")
self.crossattention = ConvBertAttention(config) self.crossattention = ConvBertAttention(config)
self.intermediate = ConvBertIntermediate(config) self.intermediate = ConvBertIntermediate(config)
self.output = ConvBertOutput(config) self.output = ConvBertOutput(config)
...@@ -578,9 +578,10 @@ class ConvBertLayer(nn.Module): ...@@ -578,9 +578,10 @@ class ConvBertLayer(nn.Module):
outputs = self_attention_outputs[1:] # add self attentions if we output attention weights outputs = self_attention_outputs[1:] # add self attentions if we output attention weights
if self.is_decoder and encoder_hidden_states is not None: if self.is_decoder and encoder_hidden_states is not None:
assert hasattr( if not hasattr(self, "crossattention"):
self, "crossattention" raise AttributeError(
), f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`" f"If `encoder_hidden_states` are passed, {self} has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`"
)
cross_attention_outputs = self.crossattention( cross_attention_outputs = self.crossattention(
attention_output, attention_output,
encoder_attention_mask, encoder_attention_mask,
......
...@@ -168,9 +168,8 @@ class TFConvBertSelfAttention(tf.keras.layers.Layer): ...@@ -168,9 +168,8 @@ class TFConvBertSelfAttention(tf.keras.layers.Layer):
self.num_attention_heads = num_attention_heads self.num_attention_heads = num_attention_heads
self.conv_kernel_size = config.conv_kernel_size self.conv_kernel_size = config.conv_kernel_size
assert ( if config.hidden_size % self.num_attention_heads != 0:
config.hidden_size % self.num_attention_heads == 0 raise ValueError("hidden_size should be divisible by num_attention_heads")
), "hidden_size should be divisible by num_attention_heads"
self.attention_head_size = config.hidden_size // config.num_attention_heads self.attention_head_size = config.hidden_size // config.num_attention_heads
self.all_head_size = self.num_attention_heads * self.attention_head_size self.all_head_size = self.num_attention_heads * self.attention_head_size
......
...@@ -404,7 +404,8 @@ class CTRLModel(CTRLPreTrainedModel): ...@@ -404,7 +404,8 @@ class CTRLModel(CTRLPreTrainedModel):
# Attention mask. # Attention mask.
if attention_mask is not None: if attention_mask is not None:
assert batch_size > 0, "batch_size has to be defined and > 0" if batch_size <= 0:
raise ValueError("batch_size has to be defined and > 0")
attention_mask = attention_mask.view(batch_size, -1) attention_mask = attention_mask.view(batch_size, -1)
# We create a 3D attention mask from a 2D tensor mask. # We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length] # Sizes are [batch_size, 1, 1, to_seq_length]
...@@ -669,9 +670,8 @@ class CTRLForSequenceClassification(CTRLPreTrainedModel): ...@@ -669,9 +670,8 @@ class CTRLForSequenceClassification(CTRLPreTrainedModel):
else: else:
batch_size, sequence_length = inputs_embeds.shape[:2] batch_size, sequence_length = inputs_embeds.shape[:2]
assert ( if self.config.pad_token_id is None and batch_size != 1:
self.config.pad_token_id is not None or batch_size == 1 raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
), "Cannot handle batch sizes > 1 if no padding token is defined."
if self.config.pad_token_id is None: if self.config.pad_token_id is None:
sequence_lengths = -1 sequence_lengths = -1
......
...@@ -816,9 +816,8 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific ...@@ -816,9 +816,8 @@ class TFCTRLForSequenceClassification(TFCTRLPreTrainedModel, TFSequenceClassific
batch_size, sequence_length = shape_list(input_ids)[:2] batch_size, sequence_length = shape_list(input_ids)[:2]
else: else:
batch_size, sequence_length = shape_list(inputs_embeds)[:2] batch_size, sequence_length = shape_list(inputs_embeds)[:2]
assert ( if self.config.pad_token_id is None and batch_size != 1:
self.config.pad_token_id is not None or batch_size == 1 raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
), "Cannot handle batch sizes > 1 if no padding token is defined."
if not tf.is_tensor(sequence_lengths): if not tf.is_tensor(sequence_lengths):
in_logits = logits[0:batch_size, sequence_lengths] in_logits = logits[0:batch_size, sequence_lengths]
......
...@@ -292,7 +292,8 @@ class SPMTokenizer: ...@@ -292,7 +292,8 @@ class SPMTokenizer:
self.vocab_file = vocab_file self.vocab_file = vocab_file
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
spm = sp.SentencePieceProcessor(**self.sp_model_kwargs) spm = sp.SentencePieceProcessor(**self.sp_model_kwargs)
assert os.path.exists(vocab_file) if not os.path.exists(vocab_file):
raise FileNotFoundError(f"{vocab_file} does not exist!")
spm.load(vocab_file) spm.load(vocab_file)
bpe_vocab_size = spm.GetPieceSize() bpe_vocab_size = spm.GetPieceSize()
# Token map # Token map
......
...@@ -412,7 +412,8 @@ class DetrSinePositionEmbedding(nn.Module): ...@@ -412,7 +412,8 @@ class DetrSinePositionEmbedding(nn.Module):
self.scale = scale self.scale = scale
def forward(self, pixel_values, pixel_mask): def forward(self, pixel_values, pixel_mask):
assert pixel_mask is not None, "No pixel mask provided" if pixel_mask is None:
raise ValueError("No pixel mask provided")
y_embed = pixel_mask.cumsum(1, dtype=torch.float32) y_embed = pixel_mask.cumsum(1, dtype=torch.float32)
x_embed = pixel_mask.cumsum(2, dtype=torch.float32) x_embed = pixel_mask.cumsum(2, dtype=torch.float32)
if self.normalize: if self.normalize:
...@@ -486,9 +487,10 @@ class DetrAttention(nn.Module): ...@@ -486,9 +487,10 @@ class DetrAttention(nn.Module):
self.num_heads = num_heads self.num_heads = num_heads
self.dropout = dropout self.dropout = dropout
self.head_dim = embed_dim // num_heads self.head_dim = embed_dim // num_heads
assert ( if self.head_dim * num_heads != self.embed_dim:
self.head_dim * num_heads == self.embed_dim raise ValueError(
), f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {num_heads})." f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`: {num_heads})."
)
self.scaling = self.head_dim**-0.5 self.scaling = self.head_dim**-0.5
self.k_proj = nn.Linear(embed_dim, embed_dim, bias=bias) self.k_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
...@@ -1254,7 +1256,8 @@ class DetrModel(DetrPreTrainedModel): ...@@ -1254,7 +1256,8 @@ class DetrModel(DetrPreTrainedModel):
# get final feature map and downsampled mask # get final feature map and downsampled mask
feature_map, mask = features[-1] feature_map, mask = features[-1]
assert mask is not None, "Backbone does not return downsampled pixel mask" if mask is None:
raise ValueError("Backbone does not return downsampled pixel mask")
# Second, apply 1x1 convolution to reduce the channel dimension to d_model (256 by default) # Second, apply 1x1 convolution to reduce the channel dimension to d_model (256 by default)
projected_feature_map = self.input_projection(feature_map) projected_feature_map = self.input_projection(feature_map)
...@@ -1709,9 +1712,10 @@ class DetrMaskHeadSmallConv(nn.Module): ...@@ -1709,9 +1712,10 @@ class DetrMaskHeadSmallConv(nn.Module):
def __init__(self, dim, fpn_dims, context_dim): def __init__(self, dim, fpn_dims, context_dim):
super().__init__() super().__init__()
assert ( if dim % 8 != 0:
dim % 8 == 0 raise ValueError(
), "The hidden_size + number of attention heads must be divisible by 8 as the number of groups in GroupNorm is set to 8" "The hidden_size + number of attention heads must be divisible by 8 as the number of groups in GroupNorm is set to 8"
)
inter_dims = [dim, context_dim // 2, context_dim // 4, context_dim // 8, context_dim // 16, context_dim // 64] inter_dims = [dim, context_dim // 2, context_dim // 4, context_dim // 8, context_dim // 16, context_dim // 64]
...@@ -1897,7 +1901,8 @@ class DetrLoss(nn.Module): ...@@ -1897,7 +1901,8 @@ class DetrLoss(nn.Module):
Classification loss (NLL) targets dicts must contain the key "class_labels" containing a tensor of dim Classification loss (NLL) targets dicts must contain the key "class_labels" containing a tensor of dim
[nb_target_boxes] [nb_target_boxes]
""" """
assert "logits" in outputs, "No logits were found in the outputs" if "logits" not in outputs:
raise KeyError("No logits were found in the outputs")
src_logits = outputs["logits"] src_logits = outputs["logits"]
idx = self._get_src_permutation_idx(indices) idx = self._get_src_permutation_idx(indices)
...@@ -1935,7 +1940,8 @@ class DetrLoss(nn.Module): ...@@ -1935,7 +1940,8 @@ class DetrLoss(nn.Module):
Targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]. The target boxes Targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]. The target boxes
are expected in format (center_x, center_y, w, h), normalized by the image size. are expected in format (center_x, center_y, w, h), normalized by the image size.
""" """
assert "pred_boxes" in outputs, "No predicted boxes found in outputs" if "pred_boxes" not in outputs:
raise KeyError("No predicted boxes found in outputs")
idx = self._get_src_permutation_idx(indices) idx = self._get_src_permutation_idx(indices)
src_boxes = outputs["pred_boxes"][idx] src_boxes = outputs["pred_boxes"][idx]
target_boxes = torch.cat([t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0) target_boxes = torch.cat([t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0)
...@@ -1957,7 +1963,8 @@ class DetrLoss(nn.Module): ...@@ -1957,7 +1963,8 @@ class DetrLoss(nn.Module):
Targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w]. Targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w].
""" """
assert "pred_masks" in outputs, "No predicted masks found in outputs" if "pred_masks" not in outputs:
raise KeyError("No predicted masks found in outputs")
src_idx = self._get_src_permutation_idx(indices) src_idx = self._get_src_permutation_idx(indices)
tgt_idx = self._get_tgt_permutation_idx(indices) tgt_idx = self._get_tgt_permutation_idx(indices)
...@@ -2002,7 +2009,8 @@ class DetrLoss(nn.Module): ...@@ -2002,7 +2009,8 @@ class DetrLoss(nn.Module):
"boxes": self.loss_boxes, "boxes": self.loss_boxes,
"masks": self.loss_masks, "masks": self.loss_masks,
} }
assert loss in loss_map, f"Loss {loss} not supported" if loss not in loss_map:
raise ValueError(f"Loss {loss} not supported")
return loss_map[loss](outputs, targets, indices, num_boxes) return loss_map[loss](outputs, targets, indices, num_boxes)
def forward(self, outputs, targets): def forward(self, outputs, targets):
...@@ -2097,7 +2105,8 @@ class DetrHungarianMatcher(nn.Module): ...@@ -2097,7 +2105,8 @@ class DetrHungarianMatcher(nn.Module):
self.class_cost = class_cost self.class_cost = class_cost
self.bbox_cost = bbox_cost self.bbox_cost = bbox_cost
self.giou_cost = giou_cost self.giou_cost = giou_cost
assert class_cost != 0 or bbox_cost != 0 or giou_cost != 0, "All costs of the Matcher can't be 0" if class_cost == 0 or bbox_cost == 0 or giou_cost == 0:
raise ValueError("All costs of the Matcher can't be 0")
@torch.no_grad() @torch.no_grad()
def forward(self, outputs, targets): def forward(self, outputs, targets):
......
...@@ -176,7 +176,8 @@ class DPREncoder(DPRPreTrainedModel): ...@@ -176,7 +176,8 @@ class DPREncoder(DPRPreTrainedModel):
def __init__(self, config: DPRConfig): def __init__(self, config: DPRConfig):
super().__init__(config) super().__init__(config)
self.bert_model = BertModel(config, add_pooling_layer=False) self.bert_model = BertModel(config, add_pooling_layer=False)
assert self.bert_model.config.hidden_size > 0, "Encoder hidden_size can't be zero" if self.bert_model.config.hidden_size <= 0:
raise ValueError("Encoder hidden_size can't be zero")
self.projection_dim = config.projection_dim self.projection_dim = config.projection_dim
if self.projection_dim > 0: if self.projection_dim > 0:
self.encode_proj = nn.Linear(self.bert_model.config.hidden_size, config.projection_dim) self.encode_proj = nn.Linear(self.bert_model.config.hidden_size, config.projection_dim)
......
...@@ -156,7 +156,8 @@ class TFDPREncoderLayer(tf.keras.layers.Layer): ...@@ -156,7 +156,8 @@ class TFDPREncoderLayer(tf.keras.layers.Layer):
self.bert_model = TFBertMainLayer(config, add_pooling_layer=False, name="bert_model") self.bert_model = TFBertMainLayer(config, add_pooling_layer=False, name="bert_model")
self.config = config self.config = config
assert self.config.hidden_size > 0, "Encoder hidden_size can't be zero" if self.config.hidden_size <= 0:
raise ValueError("Encoder hidden_size can't be zero")
self.projection_dim = config.projection_dim self.projection_dim = config.projection_dim
if self.projection_dim > 0: if self.projection_dim > 0:
self.encode_proj = tf.keras.layers.Dense( self.encode_proj = tf.keras.layers.Dense(
......
...@@ -234,9 +234,10 @@ class CustomDPRReaderTokenizerMixin: ...@@ -234,9 +234,10 @@ class CustomDPRReaderTokenizerMixin:
texts = texts if not isinstance(texts, str) else [texts] texts = texts if not isinstance(texts, str) else [texts]
n_passages = len(titles) n_passages = len(titles)
questions = questions if not isinstance(questions, str) else [questions] * n_passages questions = questions if not isinstance(questions, str) else [questions] * n_passages
assert len(titles) == len( if len(titles) != len(texts):
texts raise ValueError(
), f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts." f"There should be as many titles than texts but got {len(titles)} titles and {len(texts)} texts."
)
encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"] encoded_question_and_titles = super().__call__(questions, titles, padding=False, truncation=False)["input_ids"]
encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"] encoded_texts = super().__call__(texts, add_special_tokens=False, padding=False, truncation=False)["input_ids"]
encoded_inputs = { encoded_inputs = {
...@@ -347,9 +348,11 @@ class CustomDPRReaderTokenizerMixin: ...@@ -347,9 +348,11 @@ class CustomDPRReaderTokenizerMixin:
scores = sorted(scores, key=lambda x: x[1], reverse=True) scores = sorted(scores, key=lambda x: x[1], reverse=True)
chosen_span_intervals = [] chosen_span_intervals = []
for (start_index, end_index), score in scores: for (start_index, end_index), score in scores:
assert start_index <= end_index, f"Wrong span indices: [{start_index}:{end_index}]" if start_index > end_index:
raise ValueError(f"Wrong span indices: [{start_index}:{end_index}]")
length = end_index - start_index + 1 length = end_index - start_index + 1
assert length <= max_answer_length, f"Span is too long: {length} > {max_answer_length}" if length > max_answer_length:
raise ValueError(f"Span is too long: {length} > {max_answer_length}")
if any( if any(
[ [
start_index <= prev_start_index <= prev_end_index <= end_index start_index <= prev_start_index <= prev_end_index <= end_index
......
...@@ -115,13 +115,9 @@ def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path): ...@@ -115,13 +115,9 @@ def load_tf_weights_in_gpt_neo(model, config, gpt_neo_checkpoint_path):
# if vocab is padded, then trim off the padding embeddings # if vocab is padded, then trim off the padding embeddings
array = array[: config.vocab_size] array = array[: config.vocab_size]
try: if pointer.shape != array.shape:
assert ( raise ValueError(f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched {name}")
pointer.shape == array.shape
), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched {name}"
except AssertionError as e:
e.args += (pointer.shape, array.shape)
raise
print(f"Initialize PyTorch weight {name}") print(f"Initialize PyTorch weight {name}")
pointer.data = torch.from_numpy(array) pointer.data = torch.from_numpy(array)
...@@ -552,7 +548,8 @@ class GPTNeoModel(GPTNeoPreTrainedModel): ...@@ -552,7 +548,8 @@ class GPTNeoModel(GPTNeoPreTrainedModel):
# Attention mask. # Attention mask.
if attention_mask is not None: if attention_mask is not None:
assert batch_size > 0, "batch_size has to be defined and > 0" if batch_size <= 0:
raise ValueError("batch_size has to be defined and > 0")
attention_mask = attention_mask.view(batch_size, -1) attention_mask = attention_mask.view(batch_size, -1)
# We create a 3D attention mask from a 2D tensor mask. # We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length] # Sizes are [batch_size, 1, 1, to_seq_length]
...@@ -875,9 +872,8 @@ class GPTNeoForSequenceClassification(GPTNeoPreTrainedModel): ...@@ -875,9 +872,8 @@ class GPTNeoForSequenceClassification(GPTNeoPreTrainedModel):
else: else:
batch_size, sequence_length = inputs_embeds.shape[:2] batch_size, sequence_length = inputs_embeds.shape[:2]
assert ( if self.config.pad_token_id is None and batch_size != 1:
self.config.pad_token_id is not None or batch_size == 1 raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
), "Cannot handle batch sizes > 1 if no padding token is defined."
if self.config.pad_token_id is None: if self.config.pad_token_id is None:
sequence_lengths = -1 sequence_lengths = -1
else: else:
......
...@@ -573,7 +573,8 @@ class GPTJModel(GPTJPreTrainedModel): ...@@ -573,7 +573,8 @@ class GPTJModel(GPTJPreTrainedModel):
# Attention mask. # Attention mask.
if attention_mask is not None: if attention_mask is not None:
assert batch_size > 0, "batch_size has to be defined and > 0" if batch_size <= 0:
raise ValueError("batch_size has to be defined and > 0")
attention_mask = attention_mask.view(batch_size, -1) attention_mask = attention_mask.view(batch_size, -1)
# We create a 3D attention mask from a 2D tensor mask. # We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length] # Sizes are [batch_size, 1, 1, to_seq_length]
...@@ -939,9 +940,8 @@ class GPTJForSequenceClassification(GPTJPreTrainedModel): ...@@ -939,9 +940,8 @@ class GPTJForSequenceClassification(GPTJPreTrainedModel):
else: else:
batch_size = inputs_embeds.shape[0] batch_size = inputs_embeds.shape[0]
assert ( if self.config.pad_token_id is None and batch_size != 1:
self.config.pad_token_id is not None or batch_size == 1 raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
), "Cannot handle batch sizes > 1 if no padding token is defined."
if self.config.pad_token_id is None: if self.config.pad_token_id is None:
sequence_lengths = -1 sequence_lengths = -1
else: else:
......
...@@ -934,9 +934,8 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific ...@@ -934,9 +934,8 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific
loss = None loss = None
if labels is not None: if labels is not None:
assert ( if self.config.pad_token_id is None and logits_shape[0] != 1:
self.config.pad_token_id is not None or logits_shape[0] == 1 raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
), "Cannot handle batch sizes > 1 if no padding token is defined."
if not tf.is_tensor(sequence_lengths): if not tf.is_tensor(sequence_lengths):
in_logits = logits[0 : logits_shape[0], sequence_lengths] in_logits = logits[0 : logits_shape[0], sequence_lengths]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment