Merge branch 'master' into t5

7c3a15ac · thomwolf · 981a5c8c · e6cff60b · 7c3a15ac · 7c3a15ac
Commit 7c3a15ac authored Dec 10, 2019 by thomwolf
3 changed files
--- a/examples/README.md
+++ b/examples/README.md
@@ -24,8 +24,6 @@ pip install -r ./examples/requirements.txt
 | [Multiple Choice](#multiple-choice) | Examples running BERT/XLNet/RoBERTa on the SWAG/RACE/ARC tasks. 
 | [Named Entity Recognition](#named-entity-recognition) | Using BERT for Named Entity Recognition (NER) on the CoNLL 2003 dataset, examples with distributed training.                                                                                  |
 | [XNLI](#xnli) | Examples running BERT/XLM on the XNLI benchmark. |
-| [Abstractive summarization](#abstractive-summarization) | Using the BertAbs
-model finetuned on the CNN/DailyMail dataset to generate summaries. |

 ## TensorFlow 2.0 Bert models on GLUE

@@ -646,34 +644,6 @@ micro avg     0.8722    0.8774    0.8748     13869
 macro avg     0.8712    0.8774    0.8740     13869
 ```

-## Abstractive summarization
-
-Based on the script
-[`run_summarization_finetuning.py`](https://github.com/huggingface/transformers/blob/master/examples/run_summarization_finetuning.py).
-
-Before running this script you should download **both** CNN and Daily Mail
-datasets from [Kyunghyun Cho's website](https://cs.nyu.edu/~kcho/DMQA/)  (the
-links next to "Stories") in the same folder. Then uncompress the archives by running:
-
-```bash
-tar -xvf cnn_stories.tgz && tar -xvf dailymail_stories.tgz
-```
-
-note that the finetuning script **will not work** if you do not download both
-datasets. We will refer as `$DATA_PATH` the path to where you uncompressed both
-archive.
-
-```bash
-export DATA_PATH=/path/to/dataset/
-
-python run_summarization_finetuning.py \
-    --output_dir=output \
-    --model_type=bert2bert \
-    --model_name_or_path=bert2bert \
-    --do_train \
-    --data_path=$DATA_PATH \
-```
-
 ## XNLI

 Based on the script [`run_xnli.py`](https://github.com/huggingface/transformers/blob/master/examples/run_xnli.py).

--- a/transformers/convert_pytorch_checkpoint_to_tf2.py
+++ b/transformers/convert_pytorch_checkpoint_to_tf2.py
@@ -124,10 +124,11 @@ def convert_pt_checkpoint_to_tf(model_type, pytorch_checkpoint_path, config_file
        tf_inputs = tf_model.dummy_inputs
        tfo = tf_model(tf_inputs, training=False)  # build the network

-        pt_model = pt_model_class.from_pretrained(None,
+        state_dict = torch.load(pytorch_checkpoint_path, map_location='cpu')
+        pt_model = pt_model_class.from_pretrained(pretrained_model_name_or_path=None,
                                                  config=config,
-                                                  state_dict=torch.load(pytorch_checkpoint_path,
-                                                                        map_location='cpu'))
+                                                  state_dict=state_dict)
+
        pt_inputs = torch.tensor(inputs_list)
        with torch.no_grad():
            pto = pt_model(pt_inputs)
@@ -144,7 +145,7 @@ def convert_pt_checkpoint_to_tf(model_type, pytorch_checkpoint_path, config_file


 def convert_all_pt_checkpoints_to_tf(args_model_type, tf_dump_path, model_shortcut_names_or_path=None, config_shortcut_names_or_path=None,
-                                     compare_with_pt_model=False, use_cached_models=False, only_convert_finetuned_models=False):
+                                     compare_with_pt_model=False, use_cached_models=False, remove_cached_files=False, only_convert_finetuned_models=False):
    assert os.path.isdir(args.tf_dump_path), "--tf_dump_path should be a directory"

    if args_model_type is None:
@@ -192,13 +193,15 @@ def convert_all_pt_checkpoints_to_tf(args_model_type, tf_dump_path, model_shortc

            if os.path.isfile(model_shortcut_name):
                model_shortcut_name = 'converted_model'
+
            convert_pt_checkpoint_to_tf(model_type=model_type,
                                        pytorch_checkpoint_path=model_file,
                                        config_file=config_file,
                                        tf_dump_path=os.path.join(tf_dump_path, model_shortcut_name + '-tf_model.h5'),
                                        compare_with_pt_model=compare_with_pt_model)
-            os.remove(config_file)
-            os.remove(model_file)
+            if remove_cached_files:
+                os.remove(config_file)
+                os.remove(model_file)


 if __name__ == "__main__":
@@ -231,6 +234,9 @@ if __name__ == "__main__":
    parser.add_argument("--use_cached_models",
                        action='store_true',
                        help = "Use cached models if possible instead of updating to latest checkpoint versions.")
+    parser.add_argument("--remove_cached_files",
+                        action='store_true',
+                        help = "Remove pytorch models after conversion (save memory when converting in batches).")
    parser.add_argument("--only_convert_finetuned_models",
                        action='store_true',
                        help = "Only convert finetuned models.")
@@ -250,4 +256,5 @@ if __name__ == "__main__":
                                        config_shortcut_names_or_path=[args.config_file] if args.config_file is not None else None,
                                        compare_with_pt_model=args.compare_with_pt_model,
                                        use_cached_models=args.use_cached_models,
+                                        remove_cached_files=args.remove_cached_files,
                                        only_convert_finetuned_models=args.only_convert_finetuned_models)
--- a/transformers/modeling_utils.py
+++ b/transformers/modeling_utils.py
@@ -317,7 +317,8 @@ class PreTrainedModel(nn.Module):
            model = BertModel.from_pretrained('./tf_model/my_tf_checkpoint.ckpt.index', from_tf=True, config=config)

        """
-        if "albert" in pretrained_model_name_or_path and "v2" in pretrained_model_name_or_path:
+        if pretrained_model_name_or_path is not None and (
+                "albert" in pretrained_model_name_or_path and "v2" in pretrained_model_name_or_path):
            logger.warning("There is currently an upstream reproducibility issue with ALBERT v2 models. Please see " +
                           "https://github.com/google-research/google-research/issues/119 for more information.")