Unverified Commit 54659048 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Early labels validation (#31240)

* Move label validation checks - fail early

* Remove some formatting changes - add back labels change wav2vec2
parent 03ea1609
...@@ -763,6 +763,12 @@ class BarkCausalModel(BarkPreTrainedModel): ...@@ -763,6 +763,12 @@ class BarkCausalModel(BarkPreTrainedModel):
use_cache = use_cache if use_cache is not None else self.config.use_cache use_cache = use_cache if use_cache is not None else self.config.use_cache
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
loss = None
if labels is not None:
raise NotImplementedError(
"Training is not implemented yet for Bark - ensure you do not pass `labels` to the model."
)
# Verify if input_embeds already exists # Verify if input_embeds already exists
# then compute embeddings. # then compute embeddings.
if input_ids is not None and input_embeds is not None: if input_ids is not None and input_embeds is not None:
...@@ -870,12 +876,6 @@ class BarkCausalModel(BarkPreTrainedModel): ...@@ -870,12 +876,6 @@ class BarkCausalModel(BarkPreTrainedModel):
logits = self.lm_head(hidden_states) logits = self.lm_head(hidden_states)
loss = None
if labels is not None:
raise NotImplementedError(
"Training is not implemented yet for Bark - ensure you do not pass `labels` to the model."
)
if not return_dict: if not return_dict:
return tuple( return tuple(
v for v in [None, logits, present_key_values, all_hidden_states, all_self_attentions] if v is not None v for v in [None, logits, present_key_values, all_hidden_states, all_self_attentions] if v is not None
...@@ -1393,6 +1393,10 @@ class BarkFineModel(BarkPreTrainedModel): ...@@ -1393,6 +1393,10 @@ class BarkFineModel(BarkPreTrainedModel):
) )
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
loss = None
if labels is not None:
raise NotImplementedError("Training is not implemented yet")
if codebook_idx == 0: if codebook_idx == 0:
raise ValueError("Cannot predict 0th codebook - 0th codebook should be predicted by the coarse model") raise ValueError("Cannot predict 0th codebook - 0th codebook should be predicted by the coarse model")
...@@ -1470,10 +1474,6 @@ class BarkFineModel(BarkPreTrainedModel): ...@@ -1470,10 +1474,6 @@ class BarkFineModel(BarkPreTrainedModel):
logits = self.lm_heads[codebook_idx - self.config.n_codes_given](hidden_states) logits = self.lm_heads[codebook_idx - self.config.n_codes_given](hidden_states)
loss = None
if labels is not None:
raise NotImplementedError("Training is not implemented yet")
if not return_dict: if not return_dict:
return tuple(v for v in [None, logits, all_hidden_states, all_self_attentions] if v is not None) return tuple(v for v in [None, logits, all_hidden_states, all_self_attentions] if v is not None)
......
...@@ -1247,6 +1247,9 @@ class BeitForSemanticSegmentation(BeitPreTrainedModel): ...@@ -1247,6 +1247,9 @@ class BeitForSemanticSegmentation(BeitPreTrainedModel):
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
) )
if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
outputs = self.beit( outputs = self.beit(
pixel_values, pixel_values,
head_mask=head_mask, head_mask=head_mask,
...@@ -1279,9 +1282,6 @@ class BeitForSemanticSegmentation(BeitPreTrainedModel): ...@@ -1279,9 +1282,6 @@ class BeitForSemanticSegmentation(BeitPreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
loss = self.compute_loss(logits, auxiliary_logits, labels) loss = self.compute_loss(logits, auxiliary_logits, labels)
if not return_dict: if not return_dict:
......
...@@ -1372,9 +1372,11 @@ class Data2VecAudioForCTC(Data2VecAudioPreTrainedModel): ...@@ -1372,9 +1372,11 @@ class Data2VecAudioForCTC(Data2VecAudioPreTrainedModel):
All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ..., All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ...,
config.vocab_size - 1]`. config.vocab_size - 1]`.
""" """
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if labels is not None and labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")
outputs = self.data2vec_audio( outputs = self.data2vec_audio(
input_values, input_values,
attention_mask=attention_mask, attention_mask=attention_mask,
...@@ -1390,9 +1392,6 @@ class Data2VecAudioForCTC(Data2VecAudioPreTrainedModel): ...@@ -1390,9 +1392,6 @@ class Data2VecAudioForCTC(Data2VecAudioPreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")
# retrieve loss input_lengths from attention_mask # retrieve loss input_lengths from attention_mask
attention_mask = ( attention_mask = (
attention_mask if attention_mask is not None else torch.ones_like(input_values, dtype=torch.long) attention_mask if attention_mask is not None else torch.ones_like(input_values, dtype=torch.long)
......
...@@ -1173,6 +1173,9 @@ class Data2VecVisionForSemanticSegmentation(Data2VecVisionPreTrainedModel): ...@@ -1173,6 +1173,9 @@ class Data2VecVisionForSemanticSegmentation(Data2VecVisionPreTrainedModel):
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
) )
if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
outputs = self.data2vec_vision( outputs = self.data2vec_vision(
pixel_values, pixel_values,
head_mask=head_mask, head_mask=head_mask,
...@@ -1205,9 +1208,6 @@ class Data2VecVisionForSemanticSegmentation(Data2VecVisionPreTrainedModel): ...@@ -1205,9 +1208,6 @@ class Data2VecVisionForSemanticSegmentation(Data2VecVisionPreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
loss = self.compute_loss(logits, auxiliary_logits, labels) loss = self.compute_loss(logits, auxiliary_logits, labels)
if not return_dict: if not return_dict:
......
...@@ -1633,6 +1633,9 @@ class TFData2VecVisionForSemanticSegmentation(TFData2VecVisionPreTrainedModel): ...@@ -1633,6 +1633,9 @@ class TFData2VecVisionForSemanticSegmentation(TFData2VecVisionPreTrainedModel):
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
) )
if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
outputs = self.data2vec_vision( outputs = self.data2vec_vision(
pixel_values, pixel_values,
head_mask=head_mask, head_mask=head_mask,
...@@ -1672,9 +1675,6 @@ class TFData2VecVisionForSemanticSegmentation(TFData2VecVisionPreTrainedModel): ...@@ -1672,9 +1675,6 @@ class TFData2VecVisionForSemanticSegmentation(TFData2VecVisionPreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
loss = self.compute_loss(logits, auxiliary_logits, labels) loss = self.compute_loss(logits, auxiliary_logits, labels)
if not return_dict: if not return_dict:
......
...@@ -732,6 +732,8 @@ class MCTCTForCTC(MCTCTPreTrainedModel): ...@@ -732,6 +732,8 @@ class MCTCTForCTC(MCTCTPreTrainedModel):
All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ..., All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ...,
config.vocab_size - 1]`. config.vocab_size - 1]`.
""" """
if labels is not None and labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
outputs = self.mctct( outputs = self.mctct(
...@@ -749,9 +751,6 @@ class MCTCTForCTC(MCTCTPreTrainedModel): ...@@ -749,9 +751,6 @@ class MCTCTForCTC(MCTCTPreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")
# retrieve loss input_lengths from attention_mask # retrieve loss input_lengths from attention_mask
attention_mask = ( attention_mask = (
attention_mask attention_mask
......
...@@ -1440,9 +1440,13 @@ class RealmKnowledgeAugEncoder(RealmPreTrainedModel): ...@@ -1440,9 +1440,13 @@ class RealmKnowledgeAugEncoder(RealmPreTrainedModel):
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
```""" ```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if labels is not None and relevance_score is None:
raise ValueError(
"You have to specify `relevance_score` when `labels` is specified in order to compute loss."
)
(flattened_input_ids, flattened_attention_mask, flattened_token_type_ids) = self._flatten_inputs( (flattened_input_ids, flattened_attention_mask, flattened_token_type_ids) = self._flatten_inputs(
input_ids, attention_mask, token_type_ids input_ids, attention_mask, token_type_ids
) )
...@@ -1468,11 +1472,6 @@ class RealmKnowledgeAugEncoder(RealmPreTrainedModel): ...@@ -1468,11 +1472,6 @@ class RealmKnowledgeAugEncoder(RealmPreTrainedModel):
masked_lm_loss = None masked_lm_loss = None
if labels is not None: if labels is not None:
if candidate_score is None:
raise ValueError(
"You have to specify `relevance_score` when `labels` is specified in order to compute loss."
)
batch_size, seq_length = labels.size() batch_size, seq_length = labels.size()
if mlm_mask is None: if mlm_mask is None:
......
...@@ -424,6 +424,10 @@ class DepthAnythingForDepthEstimation(DepthAnythingPreTrainedModel): ...@@ -424,6 +424,10 @@ class DepthAnythingForDepthEstimation(DepthAnythingPreTrainedModel):
>>> formatted = (output * 255 / np.max(output)).astype("uint8") >>> formatted = (output * 255 / np.max(output)).astype("uint8")
>>> depth = Image.fromarray(formatted) >>> depth = Image.fromarray(formatted)
```""" ```"""
loss = None
if labels is not None:
raise NotImplementedError("Training is not implemented yet")
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
...@@ -444,10 +448,6 @@ class DepthAnythingForDepthEstimation(DepthAnythingPreTrainedModel): ...@@ -444,10 +448,6 @@ class DepthAnythingForDepthEstimation(DepthAnythingPreTrainedModel):
predicted_depth = self.head(hidden_states, patch_height, patch_width) predicted_depth = self.head(hidden_states, patch_height, patch_width)
loss = None
if labels is not None:
raise NotImplementedError("Training is not implemented yet")
if not return_dict: if not return_dict:
if output_hidden_states: if output_hidden_states:
output = (predicted_depth,) + outputs[1:] output = (predicted_depth,) + outputs[1:]
......
...@@ -1136,6 +1136,10 @@ class DPTForDepthEstimation(DPTPreTrainedModel): ...@@ -1136,6 +1136,10 @@ class DPTForDepthEstimation(DPTPreTrainedModel):
>>> formatted = (output * 255 / np.max(output)).astype("uint8") >>> formatted = (output * 255 / np.max(output)).astype("uint8")
>>> depth = Image.fromarray(formatted) >>> depth = Image.fromarray(formatted)
```""" ```"""
loss = None
if labels is not None:
raise NotImplementedError("Training is not implemented yet")
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
output_hidden_states = ( output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
...@@ -1183,10 +1187,6 @@ class DPTForDepthEstimation(DPTPreTrainedModel): ...@@ -1183,10 +1187,6 @@ class DPTForDepthEstimation(DPTPreTrainedModel):
predicted_depth = self.head(hidden_states) predicted_depth = self.head(hidden_states)
loss = None
if labels is not None:
raise NotImplementedError("Training is not implemented yet")
if not return_dict: if not return_dict:
if output_hidden_states: if output_hidden_states:
output = (predicted_depth,) + outputs[1:] output = (predicted_depth,) + outputs[1:]
...@@ -1308,6 +1308,9 @@ class DPTForSemanticSegmentation(DPTPreTrainedModel): ...@@ -1308,6 +1308,9 @@ class DPTForSemanticSegmentation(DPTPreTrainedModel):
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
) )
if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
outputs = self.dpt( outputs = self.dpt(
pixel_values, pixel_values,
head_mask=head_mask, head_mask=head_mask,
...@@ -1342,9 +1345,6 @@ class DPTForSemanticSegmentation(DPTPreTrainedModel): ...@@ -1342,9 +1345,6 @@ class DPTForSemanticSegmentation(DPTPreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
# upsample logits to the images' original size # upsample logits to the images' original size
upsampled_logits = nn.functional.interpolate( upsampled_logits = nn.functional.interpolate(
logits, size=labels.shape[-2:], mode="bilinear", align_corners=False logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
......
...@@ -921,6 +921,8 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific ...@@ -921,6 +921,8 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific
config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
`config.num_labels > 1` a classification loss is computed (Cross-Entropy). `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
""" """
if labels is not None and self.config.pad_token_id is None and input_ids.shape[0] != 1:
raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
transformer_outputs = self.transformer( transformer_outputs = self.transformer(
input_ids=input_ids, input_ids=input_ids,
...@@ -963,9 +965,6 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific ...@@ -963,9 +965,6 @@ class TFGPTJForSequenceClassification(TFGPTJPreTrainedModel, TFSequenceClassific
loss = None loss = None
if labels is not None: if labels is not None:
if self.config.pad_token_id is None and logits_shape[0] != 1:
raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
if not tf.is_tensor(sequence_lengths): if not tf.is_tensor(sequence_lengths):
in_logits = logits[0 : logits_shape[0], sequence_lengths] in_logits = logits[0 : logits_shape[0], sequence_lengths]
......
...@@ -1574,9 +1574,11 @@ class HubertForCTC(HubertPreTrainedModel): ...@@ -1574,9 +1574,11 @@ class HubertForCTC(HubertPreTrainedModel):
All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ..., All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ...,
config.vocab_size - 1]`. config.vocab_size - 1]`.
""" """
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if labels is not None and labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")
outputs = self.hubert( outputs = self.hubert(
input_values, input_values,
attention_mask=attention_mask, attention_mask=attention_mask,
...@@ -1592,9 +1594,6 @@ class HubertForCTC(HubertPreTrainedModel): ...@@ -1592,9 +1594,6 @@ class HubertForCTC(HubertPreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")
# retrieve loss input_lengths from attention_mask # retrieve loss input_lengths from attention_mask
attention_mask = ( attention_mask = (
attention_mask if attention_mask is not None else torch.ones_like(input_values, dtype=torch.long) attention_mask if attention_mask is not None else torch.ones_like(input_values, dtype=torch.long)
......
...@@ -1600,6 +1600,8 @@ class TFHubertForCTC(TFHubertPreTrainedModel): ...@@ -1600,6 +1600,8 @@ class TFHubertForCTC(TFHubertPreTrainedModel):
>>> loss = model(input_values, labels=labels).loss >>> loss = model(input_values, labels=labels).loss
```""" ```"""
if labels is not None and tf.reduce_max(labels) >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")
outputs = self.hubert( outputs = self.hubert(
input_values=input_values, input_values=input_values,
...@@ -1619,9 +1621,6 @@ class TFHubertForCTC(TFHubertPreTrainedModel): ...@@ -1619,9 +1621,6 @@ class TFHubertForCTC(TFHubertPreTrainedModel):
logits = self.lm_head(hidden_states) logits = self.lm_head(hidden_states)
if labels is not None: if labels is not None:
if tf.reduce_max(labels) >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")
attention_mask = ( attention_mask = (
attention_mask if attention_mask is not None else tf.ones_like(input_values, dtype=tf.float32) attention_mask if attention_mask is not None else tf.ones_like(input_values, dtype=tf.float32)
) )
......
...@@ -822,6 +822,9 @@ class MobileNetV2ForSemanticSegmentation(MobileNetV2PreTrainedModel): ...@@ -822,6 +822,9 @@ class MobileNetV2ForSemanticSegmentation(MobileNetV2PreTrainedModel):
) )
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
outputs = self.mobilenet_v2( outputs = self.mobilenet_v2(
pixel_values, pixel_values,
output_hidden_states=True, # we need the intermediate hidden states output_hidden_states=True, # we need the intermediate hidden states
...@@ -834,9 +837,6 @@ class MobileNetV2ForSemanticSegmentation(MobileNetV2PreTrainedModel): ...@@ -834,9 +837,6 @@ class MobileNetV2ForSemanticSegmentation(MobileNetV2PreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
# upsample logits to the images' original size # upsample logits to the images' original size
upsampled_logits = nn.functional.interpolate( upsampled_logits = nn.functional.interpolate(
logits, size=labels.shape[-2:], mode="bilinear", align_corners=False logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
......
...@@ -1026,6 +1026,9 @@ class MobileViTForSemanticSegmentation(MobileViTPreTrainedModel): ...@@ -1026,6 +1026,9 @@ class MobileViTForSemanticSegmentation(MobileViTPreTrainedModel):
) )
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
outputs = self.mobilevit( outputs = self.mobilevit(
pixel_values, pixel_values,
output_hidden_states=True, # we need the intermediate hidden states output_hidden_states=True, # we need the intermediate hidden states
...@@ -1038,9 +1041,6 @@ class MobileViTForSemanticSegmentation(MobileViTPreTrainedModel): ...@@ -1038,9 +1041,6 @@ class MobileViTForSemanticSegmentation(MobileViTPreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
# upsample logits to the images' original size # upsample logits to the images' original size
upsampled_logits = nn.functional.interpolate( upsampled_logits = nn.functional.interpolate(
logits, size=labels.shape[-2:], mode="bilinear", align_corners=False logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
......
...@@ -1323,6 +1323,9 @@ class TFMobileViTForSemanticSegmentation(TFMobileViTPreTrainedModel): ...@@ -1323,6 +1323,9 @@ class TFMobileViTForSemanticSegmentation(TFMobileViTPreTrainedModel):
) )
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if labels is not None and not self.config.num_labels > 1:
raise ValueError("The number of labels should be greater than one")
outputs = self.mobilevit( outputs = self.mobilevit(
pixel_values, pixel_values,
output_hidden_states=True, # we need the intermediate hidden states output_hidden_states=True, # we need the intermediate hidden states
...@@ -1336,9 +1339,6 @@ class TFMobileViTForSemanticSegmentation(TFMobileViTPreTrainedModel): ...@@ -1336,9 +1339,6 @@ class TFMobileViTForSemanticSegmentation(TFMobileViTPreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if not self.config.num_labels > 1:
raise ValueError("The number of labels should be greater than one")
else:
loss = self.hf_compute_loss(logits=logits, labels=labels) loss = self.hf_compute_loss(logits=logits, labels=labels)
# make logits of shape (batch_size, num_labels, height, width) to # make logits of shape (batch_size, num_labels, height, width) to
......
...@@ -990,6 +990,9 @@ class MobileViTV2ForSemanticSegmentation(MobileViTV2PreTrainedModel): ...@@ -990,6 +990,9 @@ class MobileViTV2ForSemanticSegmentation(MobileViTV2PreTrainedModel):
) )
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if labels is not None and self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
outputs = self.mobilevitv2( outputs = self.mobilevitv2(
pixel_values, pixel_values,
output_hidden_states=True, # we need the intermediate hidden states output_hidden_states=True, # we need the intermediate hidden states
...@@ -1002,9 +1005,6 @@ class MobileViTV2ForSemanticSegmentation(MobileViTV2PreTrainedModel): ...@@ -1002,9 +1005,6 @@ class MobileViTV2ForSemanticSegmentation(MobileViTV2PreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if self.config.num_labels == 1:
raise ValueError("The number of labels should be greater than one")
else:
# upsample logits to the images' original size # upsample logits to the images' original size
upsampled_logits = nn.functional.interpolate( upsampled_logits = nn.functional.interpolate(
logits, size=labels.shape[-2:], mode="bilinear", align_corners=False logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
......
...@@ -1740,6 +1740,10 @@ class PerceiverForOpticalFlow(PerceiverPreTrainedModel): ...@@ -1740,6 +1740,10 @@ class PerceiverForOpticalFlow(PerceiverPreTrainedModel):
```""" ```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
loss = None
if labels is not None:
raise NotImplementedError("Optical flow training is not yet supported")
outputs = self.perceiver( outputs = self.perceiver(
inputs=inputs, inputs=inputs,
attention_mask=attention_mask, attention_mask=attention_mask,
...@@ -1750,10 +1754,6 @@ class PerceiverForOpticalFlow(PerceiverPreTrainedModel): ...@@ -1750,10 +1754,6 @@ class PerceiverForOpticalFlow(PerceiverPreTrainedModel):
) )
logits = outputs.logits if return_dict else outputs[0] logits = outputs.logits if return_dict else outputs[0]
loss = None
if labels is not None:
raise NotImplementedError("Optical flow training is not yet supported")
if not return_dict: if not return_dict:
output = (logits,) + outputs[2:] output = (logits,) + outputs[2:]
return ((loss,) + output) if loss is not None else output return ((loss,) + output) if loss is not None else output
...@@ -1974,6 +1974,10 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel): ...@@ -1974,6 +1974,10 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel):
```""" ```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
loss = None
if labels is not None:
raise NotImplementedError("Multimodal autoencoding training is not yet supported")
outputs = self.perceiver( outputs = self.perceiver(
inputs=inputs, inputs=inputs,
attention_mask=attention_mask, attention_mask=attention_mask,
...@@ -1985,10 +1989,6 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel): ...@@ -1985,10 +1989,6 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel):
) )
logits = outputs.logits if return_dict else outputs[0] logits = outputs.logits if return_dict else outputs[0]
loss = None
if labels is not None:
raise NotImplementedError("Multimodal autoencoding training is not yet supported")
if not return_dict: if not return_dict:
output = (logits,) + outputs[2:] output = (logits,) + outputs[2:]
return ((loss,) + output) if loss is not None else output return ((loss,) + output) if loss is not None else output
......
...@@ -784,6 +784,9 @@ class SegformerForSemanticSegmentation(SegformerPreTrainedModel): ...@@ -784,6 +784,9 @@ class SegformerForSemanticSegmentation(SegformerPreTrainedModel):
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
) )
if labels is not None and self.config.num_labels < 1:
raise ValueError(f"Number of labels should be >=0: {self.config.num_labels}")
outputs = self.segformer( outputs = self.segformer(
pixel_values, pixel_values,
output_attentions=output_attentions, output_attentions=output_attentions,
...@@ -809,8 +812,6 @@ class SegformerForSemanticSegmentation(SegformerPreTrainedModel): ...@@ -809,8 +812,6 @@ class SegformerForSemanticSegmentation(SegformerPreTrainedModel):
loss_fct = BCEWithLogitsLoss(reduction="none") loss_fct = BCEWithLogitsLoss(reduction="none")
loss = loss_fct(upsampled_logits.squeeze(1), labels.float()) loss = loss_fct(upsampled_logits.squeeze(1), labels.float())
loss = (loss * valid_mask).mean() loss = (loss * valid_mask).mean()
else:
raise ValueError(f"Number of labels should be >=0: {self.config.num_labels}")
if not return_dict: if not return_dict:
if output_hidden_states: if output_hidden_states:
......
...@@ -988,6 +988,9 @@ class TFSegformerForSemanticSegmentation(TFSegformerPreTrainedModel): ...@@ -988,6 +988,9 @@ class TFSegformerForSemanticSegmentation(TFSegformerPreTrainedModel):
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
) )
if labels is not None and not self.config.num_labels > 1:
raise ValueError("The number of labels should be greater than one")
outputs = self.segformer( outputs = self.segformer(
pixel_values, pixel_values,
output_attentions=output_attentions, output_attentions=output_attentions,
...@@ -1001,9 +1004,6 @@ class TFSegformerForSemanticSegmentation(TFSegformerPreTrainedModel): ...@@ -1001,9 +1004,6 @@ class TFSegformerForSemanticSegmentation(TFSegformerPreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if not self.config.num_labels > 1:
raise ValueError("The number of labels should be greater than one")
else:
loss = self.hf_compute_loss(logits=logits, labels=labels) loss = self.hf_compute_loss(logits=logits, labels=labels)
# make logits of shape (batch_size, num_labels, height, width) to # make logits of shape (batch_size, num_labels, height, width) to
......
...@@ -1418,9 +1418,11 @@ class SEWForCTC(SEWPreTrainedModel): ...@@ -1418,9 +1418,11 @@ class SEWForCTC(SEWPreTrainedModel):
All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ..., All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ...,
config.vocab_size - 1]`. config.vocab_size - 1]`.
""" """
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if labels is not None and labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")
outputs = self.sew( outputs = self.sew(
input_values, input_values,
attention_mask=attention_mask, attention_mask=attention_mask,
...@@ -1436,9 +1438,6 @@ class SEWForCTC(SEWPreTrainedModel): ...@@ -1436,9 +1438,6 @@ class SEWForCTC(SEWPreTrainedModel):
loss = None loss = None
if labels is not None: if labels is not None:
if labels.max() >= self.config.vocab_size:
raise ValueError(f"Label values must be <= vocab_size: {self.config.vocab_size}")
# retrieve loss input_lengths from attention_mask # retrieve loss input_lengths from attention_mask
attention_mask = ( attention_mask = (
attention_mask if attention_mask is not None else torch.ones_like(input_values, dtype=torch.long) attention_mask if attention_mask is not None else torch.ones_like(input_values, dtype=torch.long)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment