WIP XLM + refactoring

c41f2bad · thomwolf · 288be7b7 · c41f2bad · c41f2bad · c41f2bad
Commit c41f2bad authored Jul 03, 2019 by thomwolf
20 changed files
--- a/examples/lm_finetuning/finetune_on_pregenerated.py
+++ b/examples/lm_finetuning/finetune_on_pregenerated.py
@@ -14,8 +14,8 @@ from torch.utils.data.distributed import DistributedSampler
 from tqdm import tqdm
 from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME
-from pytorch_pretrained_bert.modeling import BertForPreTraining
+from pytorch_pretrained_bert.modeling_bert import BertForPreTraining
-from pytorch_pretrained_bert.tokenization import BertTokenizer
+from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
 from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
 InputFeatures = namedtuple("InputFeatures", "input_ids input_mask segment_ids lm_label_ids is_next")

--- a/examples/lm_finetuning/pregenerate_training_data.py
+++ b/examples/lm_finetuning/pregenerate_training_data.py
@@ -5,7 +5,7 @@ from tempfile import TemporaryDirectory
 import shelve
 from random import random, randrange, randint, shuffle, choice
-from pytorch_pretrained_bert.tokenization import BertTokenizer
+from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
 import numpy as np
 import json
 import collections

--- a/examples/lm_finetuning/simple_lm_finetuning.py
+++ b/examples/lm_finetuning/simple_lm_finetuning.py
@@ -30,8 +30,8 @@ from torch.utils.data.distributed import DistributedSampler
 from tqdm import tqdm, trange
 from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME
-from pytorch_pretrained_bert.modeling import BertForPreTraining
+from pytorch_pretrained_bert.modeling_bert import BertForPreTraining
-from pytorch_pretrained_bert.tokenization import BertTokenizer
+from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
 from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
 logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',

--- a/examples/run_bert_classifier.py
+++ b/examples/run_bert_classifier.py
@@ -35,8 +35,8 @@ from torch.nn import CrossEntropyLoss, MSELoss
 from tensorboardX import SummaryWriter
 from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME
-from pytorch_pretrained_bert.modeling import BertForSequenceClassification
+from pytorch_pretrained_bert.modeling_bert import BertForSequenceClassification
-from pytorch_pretrained_bert.tokenization import BertTokenizer
+from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
 from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
 from utils_glue import processors, output_modes, convert_examples_to_features, compute_metrics

--- a/examples/run_bert_extract_features.py
+++ b/examples/run_bert_extract_features.py
@@ -28,8 +28,8 @@ import torch
 from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
 from torch.utils.data.distributed import DistributedSampler
-from pytorch_pretrained_bert.tokenization import BertTokenizer
+from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
-from pytorch_pretrained_bert.modeling import BertModel
+from pytorch_pretrained_bert.modeling_bert import BertModel
 logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s', 
                    datefmt = '%m/%d/%Y %H:%M:%S',

--- a/examples/run_bert_squad.py
+++ b/examples/run_bert_squad.py
@@ -34,9 +34,9 @@ from tqdm import tqdm, trange
 from tensorboardX import SummaryWriter
 from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME
-from pytorch_pretrained_bert.modeling import BertForQuestionAnswering
+from pytorch_pretrained_bert.modeling_bert import BertForQuestionAnswering
 from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
-from pytorch_pretrained_bert.tokenization import BertTokenizer
+from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
 from utils_squad import read_squad_examples, convert_examples_to_features, RawResult, write_predictions

--- a/examples/run_bert_swag.py
+++ b/examples/run_bert_swag.py
@@ -33,9 +33,9 @@ from torch.utils.data.distributed import DistributedSampler
 from tqdm import tqdm, trange
 from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME
-from pytorch_pretrained_bert.modeling import BertForMultipleChoice, BertConfig
+from pytorch_pretrained_bert.modeling_bert import BertForMultipleChoice, BertConfig
 from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
-from pytorch_pretrained_bert.tokenization import BertTokenizer
+from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
 logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',

--- a/examples/utils_squad.py
+++ b/examples/utils_squad.py
@@ -24,7 +24,7 @@ import math
 import collections
 from io import open
-from pytorch_pretrained_bert.tokenization import BasicTokenizer, whitespace_tokenize
+from pytorch_pretrained_bert.tokenization_bert import BasicTokenizer, whitespace_tokenize
 logger = logging.getLogger(__name__)

--- a/hubconfs/bert_hubconf.py
+++ b/hubconfs/bert_hubconf.py
-from pytorch_pretrained_bert.tokenization import BertTokenizer
+from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
-from pytorch_pretrained_bert.modeling import (
+from pytorch_pretrained_bert.modeling_bert import (
        BertModel,
        BertForNextSentencePrediction,
        BertForMaskedLM,

--- a/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb
+++ b/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb
@@ -3997,9 +3997,9 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "11/16/2018 11:03:05 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/thomaswolf/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba\n",
+      "11/16/2018 11:03:05 - INFO - pytorch_pretrained_bert.modeling_bert -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/thomaswolf/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba\n",
-      "11/16/2018 11:03:05 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /Users/thomaswolf/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmpaqgsm566\n",
+      "11/16/2018 11:03:05 - INFO - pytorch_pretrained_bert.modeling_bert -   extracting archive file /Users/thomaswolf/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmpaqgsm566\n",
-      "11/16/2018 11:03:08 - INFO - pytorch_pretrained_bert.modeling -   Model config {\n",
+      "11/16/2018 11:03:08 - INFO - pytorch_pretrained_bert.modeling_bert -   Model config {\n",
      "  \"attention_probs_dropout_prob\": 0.1,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0.1,\n",

--- a/notebooks/Comparing-TF-and-PT-models.ipynb
+++ b/notebooks/Comparing-TF-and-PT-models.ipynb
@@ -375,8 +375,8 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "11/15/2018 16:21:18 - INFO - pytorch_pretrained_bert.modeling -   loading archive file ../../google_models/uncased_L-12_H-768_A-12/\n",
+      "11/15/2018 16:21:18 - INFO - pytorch_pretrained_bert.modeling_bert -   loading archive file ../../google_models/uncased_L-12_H-768_A-12/\n",
-      "11/15/2018 16:21:18 - INFO - pytorch_pretrained_bert.modeling -   Model config {\n",
+      "11/15/2018 16:21:18 - INFO - pytorch_pretrained_bert.modeling_bert -   Model config {\n",
      "  \"attention_probs_dropout_prob\": 0.1,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0.1,\n",

--- a/pytorch_pretrained_bert/__init__.py
+++ b/pytorch_pretrained_bert/__init__.py
 __version__ = "0.6.2"
-from .tokenization import BertTokenizer, BasicTokenizer, WordpieceTokenizer
+from .tokenization_bert import BertTokenizer, BasicTokenizer, WordpieceTokenizer
 from .tokenization_openai import OpenAIGPTTokenizer
 from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus)
 from .tokenization_gpt2 import GPT2Tokenizer
 from .tokenization_xlnet import XLNetTokenizer, SPIECE_UNDERLINE
+from .tokenization_xlm import XLMTokenizer
-from .modeling import (BertConfig, BertModel, BertForPreTraining,
+from .modeling_bert import (BertConfig, BertModel, BertForPreTraining,
                       BertForMaskedLM, BertForNextSentencePrediction,
                       BertForSequenceClassification, BertForMultipleChoice,
                       BertForTokenClassification, BertForQuestionAnswering,
@@ -22,6 +23,9 @@ from .modeling_xlnet import (XLNetConfig,
                             XLNetPreTrainedModel, XLNetModel, XLNetLMHeadModel,
                             XLNetForSequenceClassification, XLNetForQuestionAnswering,
                             load_tf_weights_in_xlnet)
+from .modeling_xlm import (XLMConfig, XLMModel,
+                           XLMWithLMHeadModel, XLMForSequenceClassification,
+                           XLMForQuestionAnswering)
 from .optimization import BertAdam
 from .optimization_openai import OpenAIAdam

--- a/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py
+++ b/pytorch_pretrained_bert/convert_tf_checkpoint_to_pytorch.py
@@ -25,7 +25,7 @@ import tensorflow as tf
 import torch
 import numpy as np
-from pytorch_pretrained_bert.modeling import BertConfig, BertForPreTraining, load_tf_weights_in_bert
+from pytorch_pretrained_bert.modeling_bert import BertConfig, BertForPreTraining, load_tf_weights_in_bert
 def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path):
    # Initialise PyTorch model

--- a/pytorch_pretrained_bert/modeling.py
+++ b/pytorch_pretrained_bert/modeling.py
--- a/pytorch_pretrained_bert/modeling_gpt2.py
+++ b/pytorch_pretrained_bert/modeling_gpt2.py
@@ -32,7 +32,7 @@ from torch.nn.parameter import Parameter
 from .file_utils import cached_path
 from .model_utils import Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel, prune_conv1d_layer
-from .modeling import BertLayerNorm as LayerNorm
+from .modeling_bert import BertLayerNorm as LayerNorm
 logger = logging.getLogger(__name__)

--- a/pytorch_pretrained_bert/modeling_openai.py
+++ b/pytorch_pretrained_bert/modeling_openai.py
@@ -32,7 +32,7 @@ from torch.nn.parameter import Parameter
 from .file_utils import cached_path
 from .model_utils import Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel, prune_conv1d_layer
-from .modeling import BertLayerNorm as LayerNorm
+from .modeling_bert import BertLayerNorm as LayerNorm
 logger = logging.getLogger(__name__)

--- a/pytorch_pretrained_bert/modeling_transfo_xl.py
+++ b/pytorch_pretrained_bert/modeling_transfo_xl.py
@@ -34,7 +34,7 @@ import torch.nn.functional as F
 from torch.nn import CrossEntropyLoss
 from torch.nn.parameter import Parameter
-from .modeling import BertLayerNorm as LayerNorm
+from .modeling_bert import BertLayerNorm as LayerNorm
 from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax, sample_logits
 from .file_utils import cached_path
 from .model_utils import CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel

--- a/pytorch_pretrained_bert/modeling_xlm.py
+++ b/pytorch_pretrained_bert/modeling_xlm.py
--- a/pytorch_pretrained_bert/tests/model_tests_commons.py
+++ b/pytorch_pretrained_bert/tests/model_tests_commons.py
@@ -36,7 +36,9 @@ def _create_and_check_initialization(tester, model_classes, config, inputs_dict)
    for model_class in model_classes:
        model = model_class(config=configs_no_init)
        for name, param in model.named_parameters():
-            tester.parent.assertIn(param.data.mean().item(), [0.0, 1.0], msg="Parameter {} of model {} seems not properly initialized".format(name, model_class))
+            if param.requires_grad:
+                tester.parent.assertIn(param.data.mean().item(), [0.0, 1.0],
+                                       msg="Parameter {} of model {} seems not properly initialized".format(name, model_class))
 def _create_and_check_for_headmasking(tester, model_classes, config, inputs_dict):
    configs_no_init = _config_zero_init(config)

--- a/pytorch_pretrained_bert/tests/model_utils_test.py
+++ b/pytorch_pretrained_bert/tests/model_utils_test.py
@@ -26,7 +26,7 @@ import pytest
 import torch
 from pytorch_pretrained_bert import PretrainedConfig, PreTrainedModel
-from pytorch_pretrained_bert.modeling import BertModel, BertConfig, PRETRAINED_MODEL_ARCHIVE_MAP, PRETRAINED_CONFIG_ARCHIVE_MAP
+from pytorch_pretrained_bert.modeling_bert import BertModel, BertConfig, PRETRAINED_MODEL_ARCHIVE_MAP, PRETRAINED_CONFIG_ARCHIVE_MAP
 class ModelUtilsTest(unittest.TestCase):