"benchmark/git@developer.sourcefind.cn:change/sglang.git" did not exist on "0f52fb55ecd992f856892dc9fdec281e467e1ca9"
Commit c41f2bad authored by thomwolf's avatar thomwolf
Browse files

WIP XLM + refactoring

parent 288be7b7
...@@ -14,8 +14,8 @@ from torch.utils.data.distributed import DistributedSampler ...@@ -14,8 +14,8 @@ from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm from tqdm import tqdm
from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME
from pytorch_pretrained_bert.modeling import BertForPreTraining from pytorch_pretrained_bert.modeling_bert import BertForPreTraining
from pytorch_pretrained_bert.tokenization import BertTokenizer from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
InputFeatures = namedtuple("InputFeatures", "input_ids input_mask segment_ids lm_label_ids is_next") InputFeatures = namedtuple("InputFeatures", "input_ids input_mask segment_ids lm_label_ids is_next")
......
...@@ -5,7 +5,7 @@ from tempfile import TemporaryDirectory ...@@ -5,7 +5,7 @@ from tempfile import TemporaryDirectory
import shelve import shelve
from random import random, randrange, randint, shuffle, choice from random import random, randrange, randint, shuffle, choice
from pytorch_pretrained_bert.tokenization import BertTokenizer from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
import numpy as np import numpy as np
import json import json
import collections import collections
......
...@@ -30,8 +30,8 @@ from torch.utils.data.distributed import DistributedSampler ...@@ -30,8 +30,8 @@ from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange from tqdm import tqdm, trange
from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME
from pytorch_pretrained_bert.modeling import BertForPreTraining from pytorch_pretrained_bert.modeling_bert import BertForPreTraining
from pytorch_pretrained_bert.tokenization import BertTokenizer from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
......
...@@ -35,8 +35,8 @@ from torch.nn import CrossEntropyLoss, MSELoss ...@@ -35,8 +35,8 @@ from torch.nn import CrossEntropyLoss, MSELoss
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME
from pytorch_pretrained_bert.modeling import BertForSequenceClassification from pytorch_pretrained_bert.modeling_bert import BertForSequenceClassification
from pytorch_pretrained_bert.tokenization import BertTokenizer from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
from utils_glue import processors, output_modes, convert_examples_to_features, compute_metrics from utils_glue import processors, output_modes, convert_examples_to_features, compute_metrics
......
...@@ -28,8 +28,8 @@ import torch ...@@ -28,8 +28,8 @@ import torch
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
from torch.utils.data.distributed import DistributedSampler from torch.utils.data.distributed import DistributedSampler
from pytorch_pretrained_bert.tokenization import BertTokenizer from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
from pytorch_pretrained_bert.modeling import BertModel from pytorch_pretrained_bert.modeling_bert import BertModel
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S', datefmt = '%m/%d/%Y %H:%M:%S',
......
...@@ -34,9 +34,9 @@ from tqdm import tqdm, trange ...@@ -34,9 +34,9 @@ from tqdm import tqdm, trange
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME
from pytorch_pretrained_bert.modeling import BertForQuestionAnswering from pytorch_pretrained_bert.modeling_bert import BertForQuestionAnswering
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
from pytorch_pretrained_bert.tokenization import BertTokenizer from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
from utils_squad import read_squad_examples, convert_examples_to_features, RawResult, write_predictions from utils_squad import read_squad_examples, convert_examples_to_features, RawResult, write_predictions
......
...@@ -33,9 +33,9 @@ from torch.utils.data.distributed import DistributedSampler ...@@ -33,9 +33,9 @@ from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange from tqdm import tqdm, trange
from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME
from pytorch_pretrained_bert.modeling import BertForMultipleChoice, BertConfig from pytorch_pretrained_bert.modeling_bert import BertForMultipleChoice, BertConfig
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
from pytorch_pretrained_bert.tokenization import BertTokenizer from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S', datefmt = '%m/%d/%Y %H:%M:%S',
......
...@@ -24,7 +24,7 @@ import math ...@@ -24,7 +24,7 @@ import math
import collections import collections
from io import open from io import open
from pytorch_pretrained_bert.tokenization import BasicTokenizer, whitespace_tokenize from pytorch_pretrained_bert.tokenization_bert import BasicTokenizer, whitespace_tokenize
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
from pytorch_pretrained_bert.tokenization import BertTokenizer from pytorch_pretrained_bert.tokenization_bert import BertTokenizer
from pytorch_pretrained_bert.modeling import ( from pytorch_pretrained_bert.modeling_bert import (
BertModel, BertModel,
BertForNextSentencePrediction, BertForNextSentencePrediction,
BertForMaskedLM, BertForMaskedLM,
......
...@@ -3997,9 +3997,9 @@ ...@@ -3997,9 +3997,9 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"11/16/2018 11:03:05 - INFO - pytorch_pretrained_bert.modeling - loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/thomaswolf/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba\n", "11/16/2018 11:03:05 - INFO - pytorch_pretrained_bert.modeling_bert - loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/thomaswolf/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba\n",
"11/16/2018 11:03:05 - INFO - pytorch_pretrained_bert.modeling - extracting archive file /Users/thomaswolf/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmpaqgsm566\n", "11/16/2018 11:03:05 - INFO - pytorch_pretrained_bert.modeling_bert - extracting archive file /Users/thomaswolf/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmpaqgsm566\n",
"11/16/2018 11:03:08 - INFO - pytorch_pretrained_bert.modeling - Model config {\n", "11/16/2018 11:03:08 - INFO - pytorch_pretrained_bert.modeling_bert - Model config {\n",
" \"attention_probs_dropout_prob\": 0.1,\n", " \"attention_probs_dropout_prob\": 0.1,\n",
" \"hidden_act\": \"gelu\",\n", " \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n", " \"hidden_dropout_prob\": 0.1,\n",
......
...@@ -375,8 +375,8 @@ ...@@ -375,8 +375,8 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"11/15/2018 16:21:18 - INFO - pytorch_pretrained_bert.modeling - loading archive file ../../google_models/uncased_L-12_H-768_A-12/\n", "11/15/2018 16:21:18 - INFO - pytorch_pretrained_bert.modeling_bert - loading archive file ../../google_models/uncased_L-12_H-768_A-12/\n",
"11/15/2018 16:21:18 - INFO - pytorch_pretrained_bert.modeling - Model config {\n", "11/15/2018 16:21:18 - INFO - pytorch_pretrained_bert.modeling_bert - Model config {\n",
" \"attention_probs_dropout_prob\": 0.1,\n", " \"attention_probs_dropout_prob\": 0.1,\n",
" \"hidden_act\": \"gelu\",\n", " \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n", " \"hidden_dropout_prob\": 0.1,\n",
......
__version__ = "0.6.2" __version__ = "0.6.2"
from .tokenization import BertTokenizer, BasicTokenizer, WordpieceTokenizer from .tokenization_bert import BertTokenizer, BasicTokenizer, WordpieceTokenizer
from .tokenization_openai import OpenAIGPTTokenizer from .tokenization_openai import OpenAIGPTTokenizer
from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus) from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus)
from .tokenization_gpt2 import GPT2Tokenizer from .tokenization_gpt2 import GPT2Tokenizer
from .tokenization_xlnet import XLNetTokenizer, SPIECE_UNDERLINE from .tokenization_xlnet import XLNetTokenizer, SPIECE_UNDERLINE
from .tokenization_xlm import XLMTokenizer
from .modeling import (BertConfig, BertModel, BertForPreTraining, from .modeling_bert import (BertConfig, BertModel, BertForPreTraining,
BertForMaskedLM, BertForNextSentencePrediction, BertForMaskedLM, BertForNextSentencePrediction,
BertForSequenceClassification, BertForMultipleChoice, BertForSequenceClassification, BertForMultipleChoice,
BertForTokenClassification, BertForQuestionAnswering, BertForTokenClassification, BertForQuestionAnswering,
...@@ -22,6 +23,9 @@ from .modeling_xlnet import (XLNetConfig, ...@@ -22,6 +23,9 @@ from .modeling_xlnet import (XLNetConfig,
XLNetPreTrainedModel, XLNetModel, XLNetLMHeadModel, XLNetPreTrainedModel, XLNetModel, XLNetLMHeadModel,
XLNetForSequenceClassification, XLNetForQuestionAnswering, XLNetForSequenceClassification, XLNetForQuestionAnswering,
load_tf_weights_in_xlnet) load_tf_weights_in_xlnet)
from .modeling_xlm import (XLMConfig, XLMModel,
XLMWithLMHeadModel, XLMForSequenceClassification,
XLMForQuestionAnswering)
from .optimization import BertAdam from .optimization import BertAdam
from .optimization_openai import OpenAIAdam from .optimization_openai import OpenAIAdam
......
...@@ -25,7 +25,7 @@ import tensorflow as tf ...@@ -25,7 +25,7 @@ import tensorflow as tf
import torch import torch
import numpy as np import numpy as np
from pytorch_pretrained_bert.modeling import BertConfig, BertForPreTraining, load_tf_weights_in_bert from pytorch_pretrained_bert.modeling_bert import BertConfig, BertForPreTraining, load_tf_weights_in_bert
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path): def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path):
# Initialise PyTorch model # Initialise PyTorch model
......
...@@ -32,7 +32,7 @@ from torch.nn.parameter import Parameter ...@@ -32,7 +32,7 @@ from torch.nn.parameter import Parameter
from .file_utils import cached_path from .file_utils import cached_path
from .model_utils import Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel, prune_conv1d_layer from .model_utils import Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel, prune_conv1d_layer
from .modeling import BertLayerNorm as LayerNorm from .modeling_bert import BertLayerNorm as LayerNorm
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -32,7 +32,7 @@ from torch.nn.parameter import Parameter ...@@ -32,7 +32,7 @@ from torch.nn.parameter import Parameter
from .file_utils import cached_path from .file_utils import cached_path
from .model_utils import Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel, prune_conv1d_layer from .model_utils import Conv1D, CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel, prune_conv1d_layer
from .modeling import BertLayerNorm as LayerNorm from .modeling_bert import BertLayerNorm as LayerNorm
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -34,7 +34,7 @@ import torch.nn.functional as F ...@@ -34,7 +34,7 @@ import torch.nn.functional as F
from torch.nn import CrossEntropyLoss from torch.nn import CrossEntropyLoss
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from .modeling import BertLayerNorm as LayerNorm from .modeling_bert import BertLayerNorm as LayerNorm
from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax, sample_logits from .modeling_transfo_xl_utilities import ProjectedAdaptiveLogSoftmax, sample_logits
from .file_utils import cached_path from .file_utils import cached_path
from .model_utils import CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel from .model_utils import CONFIG_NAME, WEIGHTS_NAME, PretrainedConfig, PreTrainedModel
......
This diff is collapsed.
...@@ -36,7 +36,9 @@ def _create_and_check_initialization(tester, model_classes, config, inputs_dict) ...@@ -36,7 +36,9 @@ def _create_and_check_initialization(tester, model_classes, config, inputs_dict)
for model_class in model_classes: for model_class in model_classes:
model = model_class(config=configs_no_init) model = model_class(config=configs_no_init)
for name, param in model.named_parameters(): for name, param in model.named_parameters():
tester.parent.assertIn(param.data.mean().item(), [0.0, 1.0], msg="Parameter {} of model {} seems not properly initialized".format(name, model_class)) if param.requires_grad:
tester.parent.assertIn(param.data.mean().item(), [0.0, 1.0],
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class))
def _create_and_check_for_headmasking(tester, model_classes, config, inputs_dict): def _create_and_check_for_headmasking(tester, model_classes, config, inputs_dict):
configs_no_init = _config_zero_init(config) configs_no_init = _config_zero_init(config)
......
...@@ -26,7 +26,7 @@ import pytest ...@@ -26,7 +26,7 @@ import pytest
import torch import torch
from pytorch_pretrained_bert import PretrainedConfig, PreTrainedModel from pytorch_pretrained_bert import PretrainedConfig, PreTrainedModel
from pytorch_pretrained_bert.modeling import BertModel, BertConfig, PRETRAINED_MODEL_ARCHIVE_MAP, PRETRAINED_CONFIG_ARCHIVE_MAP from pytorch_pretrained_bert.modeling_bert import BertModel, BertConfig, PRETRAINED_MODEL_ARCHIVE_MAP, PRETRAINED_CONFIG_ARCHIVE_MAP
class ModelUtilsTest(unittest.TestCase): class ModelUtilsTest(unittest.TestCase):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment