Make errors for loss-less models more user-friendly (#18233)

ba552dd0 · Sylvain Gugger · GitHub · 43a5375c · ba552dd0
Unverified Commit ba552dd0 authored Jul 21, 2022 by Sylvain Gugger Committed by GitHub Jul 21, 2022
Show whitespace changes
Inline Side-by-side

Showing with 14 additions and 1 deletion

src/transformers/trainer.py src/transformers/trainer.py +14 -1

No files found.
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -69,7 +69,7 @@ from .deepspeed import deepspeed_init, is_deepspeed_zero3_enabled
 from .dependency_versions_check import dep_version_check
 from .modelcard import TrainingSummary
 from .modeling_utils import PreTrainedModel, load_sharded_checkpoint, unwrap_model
-from .models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
+from .models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_MAPPING_NAMES
 from .optimization import Adafactor, get_scheduler
 from .pytorch_utils import ALL_LAYERNORM_LAYERS
 from .tokenization_utils_base import PreTrainedTokenizerBase
@@ -351,6 +351,14 @@ class Trainer:
                )
            self.model_init = model_init
+        if model.__class__.__name__ in MODEL_MAPPING_NAMES:
+            raise ValueError(
+                f"The model you have picked ({model.__class__.__name__}) cannot be used as is for training: it only "
+                "computes hidden states and does not accept any labels. You should choose a model with a head "
+                "suitable for your task like any of the `AutoModelForXxx` listed at "
+                "https://huggingface.co/docs/transformers/model_doc/auto."
+            )
        if hasattr(model, "is_parallelizable") and model.is_parallelizable and model.model_parallel:
            self.is_model_parallel = True
        else:
@@ -2503,6 +2511,11 @@ class Trainer:
            else:
                loss = self.label_smoother(outputs, labels)
        else:
+            if isinstance(outputs, dict) and "loss" not in outputs:
+                raise ValueError(
+                    "The model did not return a loss from the inputs, only the following keys: "
+                    f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}."
+                )
            # We don't use .loss here since the model may return tuples instead of ModelOutput.
            loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0]