Commit e468192e authored by thomwolf's avatar thomwolf
Browse files

Merge branch 'pytorch-transformers' into xlnet

parents 9dd2c860 4ce237c8
version: 2 version: 2
jobs: jobs:
build_py3: build_py3:
working_directory: ~/pytorch-pretrained-BERT working_directory: ~/pytorch-transformers
docker: docker:
- image: circleci/python:3.5 - image: circleci/python:3.5
steps: steps:
...@@ -10,11 +10,10 @@ jobs: ...@@ -10,11 +10,10 @@ jobs:
- run: sudo pip install pytest codecov pytest-cov - run: sudo pip install pytest codecov pytest-cov
- run: sudo pip install spacy ftfy==4.4.3 - run: sudo pip install spacy ftfy==4.4.3
- run: sudo python -m spacy download en - run: sudo python -m spacy download en
- run: python -m pytest -sv ./pytorch_pretrained_bert/tests/ --cov - run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
- run: codecov - run: codecov
parallelism: 4
build_py2: build_py2:
working_directory: ~/pytorch-pretrained-BERT working_directory: ~/pytorch-transformers
docker: docker:
- image: circleci/python:2.7 - image: circleci/python:2.7
steps: steps:
...@@ -23,9 +22,8 @@ jobs: ...@@ -23,9 +22,8 @@ jobs:
- run: sudo pip install pytest codecov pytest-cov - run: sudo pip install pytest codecov pytest-cov
- run: sudo pip install spacy ftfy==4.4.3 - run: sudo pip install spacy ftfy==4.4.3
- run: sudo python -m spacy download en - run: sudo python -m spacy download en
- run: python -m pytest -sv ./pytorch_pretrained_bert/tests/ --cov - run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
- run: codecov - run: codecov
parallelism: 4
workflows: workflows:
version: 2 version: 2
build_and_test: build_and_test:
......
[run] [run]
source=pytorch_pretrained_bert source=pytorch_transformers
omit =
# skip convertion scripts from testing for now
*/convert_*
*/__main__.py
[report] [report]
exclude_lines = exclude_lines =
pragma: no cover pragma: no cover
......
...@@ -126,4 +126,5 @@ models ...@@ -126,4 +126,5 @@ models
proc_data proc_data
# examples # examples
runs
examples/runs examples/runs
\ No newline at end of file
This diff is collapsed.
...@@ -2,6 +2,6 @@ FROM pytorch/pytorch:latest ...@@ -2,6 +2,6 @@ FROM pytorch/pytorch:latest
RUN git clone https://github.com/NVIDIA/apex.git && cd apex && python setup.py install --cuda_ext --cpp_ext RUN git clone https://github.com/NVIDIA/apex.git && cd apex && python setup.py install --cuda_ext --cpp_ext
RUN pip install pytorch-pretrained-bert RUN pip install pytorch_transformers
WORKDIR /workspace WORKDIR /workspace
\ No newline at end of file
...@@ -12,7 +12,7 @@ from torch.utils.data import DataLoader, SequentialSampler, TensorDataset, Subse ...@@ -12,7 +12,7 @@ from torch.utils.data import DataLoader, SequentialSampler, TensorDataset, Subse
from torch.utils.data.distributed import DistributedSampler from torch.utils.data.distributed import DistributedSampler
from torch.nn import CrossEntropyLoss, MSELoss from torch.nn import CrossEntropyLoss, MSELoss
from pytorch_pretrained_bert import BertForSequenceClassification, BertTokenizer from pytorch_transformers import BertForSequenceClassification, BertTokenizer
from utils_glue import processors, output_modes, convert_examples_to_features, compute_metrics from utils_glue import processors, output_modes, convert_examples_to_features, compute_metrics
......
import torch import torch
from torch.nn import functional as F from torch.nn import functional as F
from pytorch_pretrained_bert import XLNetModel, XLNetLMHeadModel, XLNetTokenizer from pytorch_transformers import XLNetModel, XLNetLMHeadModel, XLNetTokenizer
import logging import logging
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
......
...@@ -13,10 +13,10 @@ from torch.utils.data import DataLoader, Dataset, RandomSampler ...@@ -13,10 +13,10 @@ from torch.utils.data import DataLoader, Dataset, RandomSampler
from torch.utils.data.distributed import DistributedSampler from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm from tqdm import tqdm
from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME
from pytorch_pretrained_bert.modeling_bert import BertForPreTraining from pytorch_transformers.modeling_bert import BertForPreTraining
from pytorch_pretrained_bert.tokenization_bert import BertTokenizer from pytorch_transformers.tokenization_bert import BertTokenizer
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule
InputFeatures = namedtuple("InputFeatures", "input_ids input_mask segment_ids lm_label_ids is_next") InputFeatures = namedtuple("InputFeatures", "input_ids input_mask segment_ids lm_label_ids is_next")
......
...@@ -5,7 +5,7 @@ from tempfile import TemporaryDirectory ...@@ -5,7 +5,7 @@ from tempfile import TemporaryDirectory
import shelve import shelve
from random import random, randrange, randint, shuffle, choice from random import random, randrange, randint, shuffle, choice
from pytorch_pretrained_bert.tokenization_bert import BertTokenizer from pytorch_transformers.tokenization_bert import BertTokenizer
import numpy as np import numpy as np
import json import json
import collections import collections
......
...@@ -29,10 +29,10 @@ from torch.utils.data import DataLoader, Dataset, RandomSampler ...@@ -29,10 +29,10 @@ from torch.utils.data import DataLoader, Dataset, RandomSampler
from torch.utils.data.distributed import DistributedSampler from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange from tqdm import tqdm, trange
from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME
from pytorch_pretrained_bert.modeling_bert import BertForPreTraining from pytorch_transformers.modeling_bert import BertForPreTraining
from pytorch_pretrained_bert.tokenization_bert import BertTokenizer from pytorch_transformers.tokenization_bert import BertTokenizer
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt='%m/%d/%Y %H:%M:%S', datefmt='%m/%d/%Y %H:%M:%S',
......
...@@ -34,10 +34,10 @@ from torch.nn import CrossEntropyLoss, MSELoss ...@@ -34,10 +34,10 @@ from torch.nn import CrossEntropyLoss, MSELoss
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME
from pytorch_pretrained_bert.modeling_bert import BertForSequenceClassification from pytorch_transformers.modeling_bert import BertForSequenceClassification
from pytorch_pretrained_bert.tokenization_bert import BertTokenizer from pytorch_transformers.tokenization_bert import BertTokenizer
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule
from utils_glue import processors, output_modes, convert_examples_to_features, compute_metrics from utils_glue import processors, output_modes, convert_examples_to_features, compute_metrics
...@@ -308,14 +308,8 @@ def main(): ...@@ -308,14 +308,8 @@ def main():
input_ids, input_mask, segment_ids, label_ids = batch input_ids, input_mask, segment_ids, label_ids = batch
# define a new function to compute loss values for both output_modes # define a new function to compute loss values for both output_modes
logits = model(input_ids, token_type_ids=segment_ids, attention_mask=input_mask) ouputs = model(input_ids, token_type_ids=segment_ids, attention_mask=input_mask, labels=label_ids)
loss = ouputs[0]
if output_mode == "classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, num_labels), label_ids.view(-1))
elif output_mode == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1), label_ids.view(-1))
if n_gpu > 1: if n_gpu > 1:
loss = loss.mean() # mean() to average on multi-gpu. loss = loss.mean() # mean() to average on multi-gpu.
...@@ -422,15 +416,8 @@ def main(): ...@@ -422,15 +416,8 @@ def main():
label_ids = label_ids.to(device) label_ids = label_ids.to(device)
with torch.no_grad(): with torch.no_grad():
logits = model(input_ids, token_type_ids=segment_ids, attention_mask=input_mask) outputs = model(input_ids, token_type_ids=segment_ids, attention_mask=input_mask, labels=label_ids)
tmp_eval_loss, logits = outputs[:2]
# create eval loss and other metric required by the task
if output_mode == "classification":
loss_fct = CrossEntropyLoss()
tmp_eval_loss = loss_fct(logits.view(-1, num_labels), label_ids.view(-1))
elif output_mode == "regression":
loss_fct = MSELoss()
tmp_eval_loss = loss_fct(logits.view(-1), label_ids.view(-1))
eval_loss += tmp_eval_loss.mean().item() eval_loss += tmp_eval_loss.mean().item()
nb_eval_steps += 1 nb_eval_steps += 1
......
...@@ -28,8 +28,8 @@ import torch ...@@ -28,8 +28,8 @@ import torch
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
from torch.utils.data.distributed import DistributedSampler from torch.utils.data.distributed import DistributedSampler
from pytorch_pretrained_bert.tokenization_bert import BertTokenizer from pytorch_transformers.tokenization_bert import BertTokenizer
from pytorch_pretrained_bert.modeling_bert import BertModel from pytorch_transformers.modeling_bert import BertModel
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S', datefmt = '%m/%d/%Y %H:%M:%S',
......
...@@ -33,10 +33,10 @@ from tqdm import tqdm, trange ...@@ -33,10 +33,10 @@ from tqdm import tqdm, trange
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME
from pytorch_pretrained_bert.modeling_bert import BertForQuestionAnswering from pytorch_transformers.modeling_bert import BertForQuestionAnswering
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule
from pytorch_pretrained_bert.tokenization_bert import BertTokenizer from pytorch_transformers.tokenization_bert import BertTokenizer
from utils_squad import read_squad_examples, convert_examples_to_features, RawResult, write_predictions from utils_squad import read_squad_examples, convert_examples_to_features, RawResult, write_predictions
......
...@@ -32,10 +32,10 @@ from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, ...@@ -32,10 +32,10 @@ from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
from torch.utils.data.distributed import DistributedSampler from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange from tqdm import tqdm, trange
from pytorch_pretrained_bert.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME from pytorch_transformers.file_utils import PYTORCH_PRETRAINED_BERT_CACHE, WEIGHTS_NAME, CONFIG_NAME
from pytorch_pretrained_bert.modeling_bert import BertForMultipleChoice, BertConfig from pytorch_transformers.modeling_bert import BertForMultipleChoice, BertConfig
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule
from pytorch_pretrained_bert.tokenization_bert import BertTokenizer from pytorch_transformers.tokenization_bert import BertTokenizer
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S', datefmt = '%m/%d/%Y %H:%M:%S',
......
This diff is collapsed.
...@@ -8,7 +8,7 @@ import torch ...@@ -8,7 +8,7 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
import numpy as np import numpy as np
from pytorch_pretrained_bert import GPT2LMHeadModel, GPT2Tokenizer from pytorch_transformers import GPT2LMHeadModel, GPT2Tokenizer
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S', datefmt = '%m/%d/%Y %H:%M:%S',
......
...@@ -39,7 +39,7 @@ import torch ...@@ -39,7 +39,7 @@ import torch
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
TensorDataset) TensorDataset)
from pytorch_pretrained_bert import (OpenAIGPTDoubleHeadsModel, OpenAIGPTTokenizer, from pytorch_transformers import (OpenAIGPTDoubleHeadsModel, OpenAIGPTTokenizer,
OpenAIAdam, cached_path, WEIGHTS_NAME, CONFIG_NAME) OpenAIAdam, cached_path, WEIGHTS_NAME, CONFIG_NAME)
ROCSTORIES_URL = "https://s3.amazonaws.com/datasets.huggingface.co/ROCStories.tar.gz" ROCSTORIES_URL = "https://s3.amazonaws.com/datasets.huggingface.co/ROCStories.tar.gz"
......
This diff is collapsed.
...@@ -28,7 +28,7 @@ import math ...@@ -28,7 +28,7 @@ import math
import torch import torch
from pytorch_pretrained_bert import TransfoXLLMHeadModel, TransfoXLCorpus, TransfoXLTokenizer from pytorch_transformers import TransfoXLLMHeadModel, TransfoXLCorpus, TransfoXLTokenizer
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S', datefmt = '%m/%d/%Y %H:%M:%S',
......
...@@ -34,10 +34,10 @@ from torch.nn import CrossEntropyLoss, MSELoss ...@@ -34,10 +34,10 @@ from torch.nn import CrossEntropyLoss, MSELoss
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
from pytorch_pretrained_bert import WEIGHTS_NAME, CONFIG_NAME from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME
from pytorch_pretrained_bert.modeling_xlnet import XLNetForSequenceClassification from pytorch_transformers.modeling_xlnet import XLNetForSequenceClassification
from pytorch_pretrained_bert.tokenization_xlnet import XLNetTokenizer from pytorch_transformers.tokenization_xlnet import XLNetTokenizer
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule from pytorch_transformers.optimization import BertAdam, WarmupLinearSchedule
from utils_glue import processors, output_modes, convert_examples_to_features, compute_metrics from utils_glue import processors, output_modes, convert_examples_to_features, compute_metrics
...@@ -211,8 +211,8 @@ def main(): ...@@ -211,8 +211,8 @@ def main():
logger.info("No cache file at %s, preparing train features", cached_train_features_file) logger.info("No cache file at %s, preparing train features", cached_train_features_file)
train_features = convert_examples_to_features( train_features = convert_examples_to_features(
train_examples, label_list, args.max_seq_length, tokenizer, output_mode, train_examples, label_list, args.max_seq_length, tokenizer, output_mode,
cls_token_at_end=True, cls_token=tokenizer.CLS_TOKEN, cls_token_at_end=True, cls_token=tokenizer.cls_token,
sep_token=tokenizer.SEP_TOKEN, cls_token_segment_id=2, sep_token=tokenizer.sep_token, cls_token_segment_id=2,
pad_on_left=True, pad_token_segment_id=4) pad_on_left=True, pad_token_segment_id=4)
if args.local_rank == -1 or torch.distributed.get_rank() == 0: if args.local_rank == -1 or torch.distributed.get_rank() == 0:
logger.info(" Saving train features into cached file %s", cached_train_features_file) logger.info(" Saving train features into cached file %s", cached_train_features_file)
...@@ -369,8 +369,8 @@ def main(): ...@@ -369,8 +369,8 @@ def main():
logger.info("No cache file at %s, preparing eval features", cached_eval_features_file) logger.info("No cache file at %s, preparing eval features", cached_eval_features_file)
eval_features = convert_examples_to_features( eval_features = convert_examples_to_features(
eval_examples, label_list, args.max_seq_length, tokenizer, output_mode, eval_examples, label_list, args.max_seq_length, tokenizer, output_mode,
cls_token_at_end=True, cls_token=tokenizer.CLS_TOKEN, cls_token_at_end=True, cls_token=tokenizer.cls_token,
sep_token=tokenizer.SEP_TOKEN, cls_token_segment_id=2, sep_token=tokenizer.sep_token, cls_token_segment_id=2,
pad_on_left=True, pad_token_segment_id=4) pad_on_left=True, pad_token_segment_id=4)
if args.local_rank == -1 or torch.distributed.get_rank() == 0: if args.local_rank == -1 or torch.distributed.get_rank() == 0:
logger.info(" Saving eval features into cached file %s", cached_eval_features_file) logger.info(" Saving eval features into cached file %s", cached_eval_features_file)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment