Empty assert hunt (#6056)

* Fixed empty asserts * black-reformatted stragglers in templates * More code quality checks * Update src/transformers/convert_marian_to_pytorch.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/convert_marian_to_pytorch.py Co-authored-by: Sam Shleifer <sshleifer@gmail.com> * removed unused line as per @sshleifer Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sam Shleifer <sshleifer@gmail.com>

Empty assert hunt (#6056)
* Fixed empty asserts * black-reformatted stragglers in templates * More code quality checks * Update src/transformers/convert_marian_to_pytorch.py Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> * Update src/transformers/convert_marian_to_pytorch.py Co-authored-by: Sam Shleifer <sshleifer@gmail.com> * removed unused line as per @sshleifer Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Sam Shleifer <sshleifer@gmail.com>
5a0dac53 · Teven · GitHub · 16c22401 · 5a0dac53 · 5a0dac53
Unverified Commit 5a0dac53 authored Aug 03, 2020 by Teven Committed by GitHub Aug 03, 2020
6 changed files
--- a/src/transformers/modeling_xlnet.py
+++ b/src/transformers/modeling_xlnet.py
@@ -169,11 +169,15 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
            array = np.transpose(array)
        if isinstance(pointer, list):
            # Here we will split the TF weights
-            assert len(pointer) == array.shape[0]
+            assert (
+                len(pointer) == array.shape[0]
+            ), f"Pointer length {len(pointer)} and array length {array.shape[0]} mismatched"
            for i, p_i in enumerate(pointer):
                arr_i = array[i, ...]
                try:
-                    assert p_i.shape == arr_i.shape
+                    assert (
+                        p_i.shape == arr_i.shape
+                    ), f"Pointer shape {p_i.shape} and array shape {arr_i.shape} mismatched"
                except AssertionError as e:
                    e.args += (p_i.shape, arr_i.shape)
                    raise
@@ -181,7 +185,9 @@ def load_tf_weights_in_xlnet(model, config, tf_path):
                p_i.data = torch.from_numpy(arr_i)
        else:
            try:
-                assert pointer.shape == array.shape
+                assert (
+                    pointer.shape == array.shape
+                ), f"Pointer shape {pointer.shape} and array shape {array.shape} mismatched"
            except AssertionError as e:
                e.args += (pointer.shape, array.shape)
                raise

--- a/src/transformers/tokenization_transfo_xl.py
+++ b/src/transformers/tokenization_transfo_xl.py
@@ -147,7 +147,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
    def count_file(self, path, verbose=False, add_eos=False):
        if verbose:
            logger.info("counting file {} ...".format(path))
-        assert os.path.exists(path)
+        assert os.path.exists(path), f"Input file {path} not found"

        sents = []
        with open(path, "r", encoding="utf-8") as f:
@@ -233,7 +233,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
    def encode_file(self, path, ordered=False, verbose=False, add_eos=True, add_double_eos=False):
        if verbose:
            logger.info("encoding file {} ...".format(path))
-        assert os.path.exists(path)
+        assert os.path.exists(path), f"Output file {path} not found"
        encoded = []
        with open(path, "r", encoding="utf-8") as f:
            for idx, line in enumerate(f):

--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -683,7 +683,8 @@ class SpecialTokensMixin:
        for key, value in kwargs.items():
            if key in self.SPECIAL_TOKENS_ATTRIBUTES:
                if key == "additional_special_tokens":
-                    assert isinstance(value, (list, tuple)) and all(isinstance(t, str) for t in value)
+                    assert isinstance(value, (list, tuple)), f"Value {value} is not a list or tuple"
+                    assert all(isinstance(t, str) for t in value), "One of the tokens is not a string"
                    setattr(self, key, value)
                elif isinstance(value, (str, AddedToken)):
                    setattr(self, key, value)
@@ -752,7 +753,7 @@ class SpecialTokensMixin:

        added_tokens = 0
        for key, value in special_tokens_dict.items():
-            assert key in self.SPECIAL_TOKENS_ATTRIBUTES
+            assert key in self.SPECIAL_TOKENS_ATTRIBUTES, f"Key {key} is not a special token"

            if self.verbose:
                logger.info("Assigning %s to the %s key of the tokenizer", value, key)

--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -124,11 +124,15 @@ class SequentialDistributedSampler(Sampler):

        # add extra samples to make it evenly divisible
        indices += indices[: (self.total_size - len(indices))]
-        assert len(indices) == self.total_size
+        assert (
+            len(indices) == self.total_size
+        ), f"Indices length {len(indices)} and total size {self.total_size} mismatched"

        # subsample
        indices = indices[self.rank * self.num_samples : (self.rank + 1) * self.num_samples]
-        assert len(indices) == self.num_samples
+        assert (
+            len(indices) == self.num_samples
+        ), f"Indices length {len(indices)} and and sample number {self.num_samples} mismatched"

        return iter(indices)

@@ -566,9 +570,11 @@ class Trainer:
                        # In all cases (even distributed/parallel), self.model is always a reference
                        # to the model we want to save.
                        if hasattr(model, "module"):
-                            assert model.module is self.model
+                            assert (
+                                model.module is self.model
+                            ), f"Module {model.module} should be a reference to self.model"
                        else:
-                            assert model is self.model
+                            assert model is self.model, f"Model {model} should be a reference to self.model"
                        # Save model checkpoint
                        output_dir = os.path.join(self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.global_step}")


--- a/templates/adding_a_new_example_script/utils_xxx.py
+++ b/templates/adding_a_new_example_script/utils_xxx.py
@@ -327,9 +327,15 @@ def convert_examples_to_features(
                segment_ids.append(pad_token_segment_id)
                p_mask.append(1)

-            assert len(input_ids) == max_seq_length
-            assert len(input_mask) == max_seq_length
-            assert len(segment_ids) == max_seq_length
+            assert (
+                len(input_ids) == max_seq_length
+            ), f"Input ids and sequence have mismatched lengths {len(input_ids)} and {max_seq_length}"
+            assert (
+                len(input_mask) == max_seq_length
+            ), f"Input mask and sequence have mismatched lengths {len(input_mask)} and {max_seq_length}"
+            assert (
+                len(segment_ids) == max_seq_length
+            ), f"Segment ids and sequence have mismatched lengths {len(segment_ids)} and {max_seq_length}"

            span_is_impossible = example.is_impossible
            start_position = None
@@ -626,7 +632,7 @@ def write_predictions(
        if not nbest:
            nbest.append(_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))

-        assert len(nbest) >= 1
+        assert len(nbest) >= 1, "No valid predictions"

        total_scores = []
        best_non_null_entry = None
@@ -647,7 +653,7 @@ def write_predictions(
            output["end_logit"] = entry.end_logit
            nbest_json.append(output)

-        assert len(nbest_json) >= 1
+        assert len(nbest_json) >= 1, "No valid predictions"

        if not version_2_with_negative:
            all_predictions[example.qas_id] = nbest_json[0]["text"]
@@ -843,8 +849,8 @@ def write_predictions_extended(
            output["end_log_prob"] = entry.end_log_prob
            nbest_json.append(output)

-        assert len(nbest_json) >= 1
-        assert best_non_null_entry is not None
+        assert len(nbest_json) >= 1, "No valid predictions"
+        assert best_non_null_entry is not None, "No valid predictions"

        score_diff = score_null
        scores_diff_json[example.qas_id] = score_diff

--- a/templates/adding_a_new_model/modeling_xxx.py
+++ b/templates/adding_a_new_model/modeling_xxx.py
@@ -121,7 +121,9 @@ def load_tf_weights_in_xxx(model, config, tf_checkpoint_path):
        elif m_name == "kernel":
            array = np.transpose(array)
        try:
-            assert pointer.shape == array.shape
+            assert (
+                pointer.shape == array.shape
+            ), f"Pointer and array have mismatched shapes {pointer.shape} and {array.shape}"
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise