Commit a75c64d8 authored by Lysandre's avatar Lysandre
Browse files

Black 20 release

parent e78c1103
......@@ -61,6 +61,5 @@ class RobertaConfig(BertConfig):
model_type = "roberta"
def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **kwargs):
"""Constructs RobertaConfig.
"""
"""Constructs RobertaConfig."""
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
......@@ -80,7 +80,10 @@ class T5Config(PretrainedConfig):
**kwargs
):
super().__init__(
pad_token_id=pad_token_id, eos_token_id=eos_token_id, is_encoder_decoder=is_encoder_decoder, **kwargs,
pad_token_id=pad_token_id,
eos_token_id=eos_token_id,
is_encoder_decoder=is_encoder_decoder,
**kwargs,
)
self.vocab_size = vocab_size
self.n_positions = n_positions
......
......@@ -29,7 +29,7 @@ logger = logging.get_logger(__name__)
class PretrainedConfig(object):
r""" Base class for all configuration classes.
r"""Base class for all configuration classes.
Handles a few parameters common to all models' configurations as well as methods for loading/downloading/saving
configurations.
......
......@@ -191,8 +191,7 @@ class XLMConfig(PretrainedConfig):
bos_token_id=0,
**kwargs
):
"""Constructs XLMConfig.
"""
"""Constructs XLMConfig."""
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, **kwargs)
self.vocab_size = vocab_size
self.emb_dim = emb_dim
......
......@@ -162,8 +162,7 @@ class XLNetConfig(PretrainedConfig):
eos_token_id=2,
**kwargs
):
"""Constructs XLNetConfig.
"""
"""Constructs XLNetConfig."""
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
self.vocab_size = vocab_size
self.d_model = d_model
......
......@@ -27,5 +27,6 @@ if __name__ == "__main__":
checkpoint_path = os.path.join(args.dialogpt_path, f"{MODEL}_ft.pkl")
pytorch_dump_folder_path = f"./DialoGPT-{MODEL}"
convert_dialogpt_checkpoint(
checkpoint_path, pytorch_dump_folder_path,
checkpoint_path,
pytorch_dump_folder_path,
)
......@@ -38,24 +38,39 @@ class OnnxConverterArgumentParser(ArgumentParser):
super().__init__("ONNX Converter")
self.add_argument(
"--pipeline", type=str, choices=SUPPORTED_PIPELINES, default="feature-extraction",
"--pipeline",
type=str,
choices=SUPPORTED_PIPELINES,
default="feature-extraction",
)
self.add_argument(
"--model", type=str, required=True, help="Model's id or path (ex: bert-base-cased)",
"--model",
type=str,
required=True,
help="Model's id or path (ex: bert-base-cased)",
)
self.add_argument("--tokenizer", type=str, help="Tokenizer's id or path (ex: bert-base-cased)")
self.add_argument(
"--framework", type=str, choices=["pt", "tf"], help="Framework for loading the model",
"--framework",
type=str,
choices=["pt", "tf"],
help="Framework for loading the model",
)
self.add_argument("--opset", type=int, default=11, help="ONNX opset to use")
self.add_argument(
"--check-loading", action="store_true", help="Check ONNX is able to load the model",
"--check-loading",
action="store_true",
help="Check ONNX is able to load the model",
)
self.add_argument(
"--use-external-format", action="store_true", help="Allow exporting model >= than 2Gb",
"--use-external-format",
action="store_true",
help="Allow exporting model >= than 2Gb",
)
self.add_argument(
"--quantize", action="store_true", help="Quantize the neural network to be run with int8",
"--quantize",
action="store_true",
help="Quantize the neural network to be run with int8",
)
self.add_argument("output")
......@@ -376,7 +391,10 @@ def quantize(onnx_model_path: Path) -> Path:
)
quantized_model = quantize(
model=onnx_model, quantization_mode=QuantizationMode.IntegerOps, force_fusions=True, symmetric_weight=True,
model=onnx_model,
quantization_mode=QuantizationMode.IntegerOps,
force_fusions=True,
symmetric_weight=True,
)
# Append "-quantized" at the end of the model's name
......
......@@ -255,7 +255,11 @@ license: apache-2.0
def write_model_card(
hf_model_name: str, repo_root="OPUS-MT-train", save_dir=Path("marian_converted"), dry_run=False, extra_metadata={},
hf_model_name: str,
repo_root="OPUS-MT-train",
save_dir=Path("marian_converted"),
dry_run=False,
extra_metadata={},
) -> str:
"""Copy the most recent model's readme section from opus, and add metadata.
upload command: aws s3 sync model_card_dir s3://models.huggingface.co/bert/Helsinki-NLP/ --dryrun
......@@ -604,7 +608,9 @@ class OpusState:
assert "hidden_size" not in cfg.to_dict()
load_layers_(
model.model.encoder.layers, state_dict, BART_CONVERTER,
model.model.encoder.layers,
state_dict,
BART_CONVERTER,
)
load_layers_(model.model.decoder.layers, state_dict, BART_CONVERTER, is_decoder=True)
......
......@@ -108,7 +108,12 @@ if is_torch_available():
logging.set_verbosity_info()
MODEL_CLASSES = {
"bert": (BertConfig, TFBertForPreTraining, BertForPreTraining, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,),
"bert": (
BertConfig,
TFBertForPreTraining,
BertForPreTraining,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"bert-large-uncased-whole-word-masking-finetuned-squad": (
BertConfig,
TFBertForQuestionAnswering,
......@@ -127,9 +132,24 @@ MODEL_CLASSES = {
BertForSequenceClassification,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"gpt2": (GPT2Config, TFGPT2LMHeadModel, GPT2LMHeadModel, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,),
"xlnet": (XLNetConfig, TFXLNetLMHeadModel, XLNetLMHeadModel, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,),
"xlm": (XLMConfig, TFXLMWithLMHeadModel, XLMWithLMHeadModel, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,),
"gpt2": (
GPT2Config,
TFGPT2LMHeadModel,
GPT2LMHeadModel,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlnet": (
XLNetConfig,
TFXLNetLMHeadModel,
XLNetLMHeadModel,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlm": (
XLMConfig,
TFXLMWithLMHeadModel,
XLMWithLMHeadModel,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlm-roberta": (
XLMRobertaConfig,
TFXLMRobertaForMaskedLM,
......@@ -148,7 +168,12 @@ MODEL_CLASSES = {
OpenAIGPTLMHeadModel,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"roberta": (RobertaConfig, TFRobertaForMaskedLM, RobertaForMaskedLM, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,),
"roberta": (
RobertaConfig,
TFRobertaForMaskedLM,
RobertaForMaskedLM,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"roberta-large-mnli": (
RobertaConfig,
TFRobertaForSequenceClassification,
......@@ -179,10 +204,30 @@ MODEL_CLASSES = {
DistilBertForQuestionAnswering,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"ctrl": (CTRLConfig, TFCTRLLMHeadModel, CTRLLMHeadModel, CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,),
"albert": (AlbertConfig, TFAlbertForPreTraining, AlbertForPreTraining, ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,),
"t5": (T5Config, TFT5ForConditionalGeneration, T5ForConditionalGeneration, T5_PRETRAINED_CONFIG_ARCHIVE_MAP,),
"electra": (ElectraConfig, TFElectraForPreTraining, ElectraForPreTraining, ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,),
"ctrl": (
CTRLConfig,
TFCTRLLMHeadModel,
CTRLLMHeadModel,
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"albert": (
AlbertConfig,
TFAlbertForPreTraining,
AlbertForPreTraining,
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"t5": (
T5Config,
TFT5ForConditionalGeneration,
T5ForConditionalGeneration,
T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"electra": (
ElectraConfig,
TFElectraForPreTraining,
ElectraForPreTraining,
ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
}
......
......@@ -49,10 +49,12 @@ def set_layer_weights_in_torch_lsh(weights, torch_layer, hidden_size):
torch.tensor(np_query_key).transpose(1, 2).contiguous().view(-1, hidden_size),
)
set_param(
torch_layer.self_attention.value, torch.tensor(np_value).transpose(1, 2).contiguous().view(-1, hidden_size),
torch_layer.self_attention.value,
torch.tensor(np_value).transpose(1, 2).contiguous().view(-1, hidden_size),
)
set_param(
torch_layer.output.dense, torch.tensor(np_dense).view(-1, hidden_size).contiguous().transpose(0, 1),
torch_layer.output.dense,
torch.tensor(np_dense).view(-1, hidden_size).contiguous().transpose(0, 1),
)
......@@ -64,16 +66,20 @@ def set_layer_weights_in_torch_local(weights, torch_layer, hidden_size):
np_dense = np.asarray(weights[3])
set_param(
torch_layer.self_attention.query, torch.tensor(np_query).transpose(1, 2).contiguous().view(-1, hidden_size),
torch_layer.self_attention.query,
torch.tensor(np_query).transpose(1, 2).contiguous().view(-1, hidden_size),
)
set_param(
torch_layer.self_attention.key, torch.tensor(np_key).transpose(1, 2).contiguous().view(-1, hidden_size),
torch_layer.self_attention.key,
torch.tensor(np_key).transpose(1, 2).contiguous().view(-1, hidden_size),
)
set_param(
torch_layer.self_attention.value, torch.tensor(np_value).transpose(1, 2).contiguous().view(-1, hidden_size),
torch_layer.self_attention.value,
torch.tensor(np_value).transpose(1, 2).contiguous().view(-1, hidden_size),
)
set_param(
torch_layer.output.dense, torch.tensor(np_dense).view(-1, hidden_size).contiguous().transpose(0, 1),
torch_layer.output.dense,
torch.tensor(np_dense).view(-1, hidden_size).contiguous().transpose(0, 1),
)
......@@ -83,7 +89,9 @@ def set_block_weights_in_torch(weights, torch_block, hidden_size):
layer_norm_1_weight = np.asarray(layer_norm_1[0])
layer_norm_1_bias = np.asarray(layer_norm_1[1])
set_param(
torch_block.attention.layer_norm, torch.tensor(layer_norm_1_weight), torch.tensor(layer_norm_1_bias),
torch_block.attention.layer_norm,
torch.tensor(layer_norm_1_weight),
torch.tensor(layer_norm_1_bias),
)
# lsh weights + output
......@@ -104,7 +112,9 @@ def set_block_weights_in_torch(weights, torch_block, hidden_size):
layer_norm_2_weight = np.asarray(intermediate_weights[0][0])
layer_norm_2_bias = np.asarray(intermediate_weights[0][1])
set_param(
torch_block.feed_forward.layer_norm, torch.tensor(layer_norm_2_weight), torch.tensor(layer_norm_2_bias),
torch_block.feed_forward.layer_norm,
torch.tensor(layer_norm_2_weight),
torch.tensor(layer_norm_2_bias),
)
# intermediate dense
......@@ -133,7 +143,8 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size):
# word embeds
word_embeddings = np.asarray(weights[1])
set_param(
torch_model_reformer.embeddings.word_embeddings, torch.tensor(word_embeddings),
torch_model_reformer.embeddings.word_embeddings,
torch.tensor(word_embeddings),
)
if isinstance(weights[3], tuple):
......
......@@ -86,7 +86,10 @@ class GlueDataset(Dataset):
cached_features_file = os.path.join(
cache_dir if cache_dir is not None else args.data_dir,
"cached_{}_{}_{}_{}".format(
mode.value, tokenizer.__class__.__name__, str(args.max_seq_length), args.task_name,
mode.value,
tokenizer.__class__.__name__,
str(args.max_seq_length),
args.task_name,
),
)
label_list = self.processor.get_labels()
......
......@@ -21,7 +21,11 @@ class TextDataset(Dataset):
"""
def __init__(
self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int, overwrite_cache=False,
self,
tokenizer: PreTrainedTokenizer,
file_path: str,
block_size: int,
overwrite_cache=False,
):
assert os.path.isfile(file_path), f"Input file path {file_path} not found"
......@@ -29,7 +33,12 @@ class TextDataset(Dataset):
directory, filename = os.path.split(file_path)
cached_features_file = os.path.join(
directory, "cached_lm_{}_{}_{}".format(tokenizer.__class__.__name__, str(block_size), filename,),
directory,
"cached_lm_{}_{}_{}".format(
tokenizer.__class__.__name__,
str(block_size),
filename,
),
)
# Make sure only the first process in distributed training processes the dataset,
......
......@@ -119,7 +119,10 @@ class SquadDataset(Dataset):
cached_features_file = os.path.join(
cache_dir if cache_dir is not None else args.data_dir,
"cached_{}_{}_{}_{}".format(
mode.value, tokenizer.__class__.__name__, str(args.max_seq_length), version_tag,
mode.value,
tokenizer.__class__.__name__,
str(args.max_seq_length),
version_tag,
),
)
......
......@@ -589,7 +589,7 @@ def compute_predictions_log_probs(
tokenizer,
verbose_logging,
):
""" XLNet write prediction logic (more complex than Bert's).
"""XLNet write prediction logic (more complex than Bert's).
Write final predictions to the json file and log-odds of null if needed.
Requires utils_squad_evaluate.py
......
......@@ -69,7 +69,10 @@ def glue_convert_examples_to_features(
if is_tf_available():
def _tf_glue_convert_examples_to_features(
examples: tf.data.Dataset, tokenizer: PreTrainedTokenizer, task=str, max_length: Optional[int] = None,
examples: tf.data.Dataset,
tokenizer: PreTrainedTokenizer,
task=str,
max_length: Optional[int] = None,
) -> tf.data.Dataset:
"""
Returns:
......
......@@ -269,7 +269,9 @@ class SingleSentenceClassificationProcessor(DataProcessor):
logger.info("Tokenizing example %d", ex_index)
input_ids = tokenizer.encode(
example.text_a, add_special_tokens=True, max_length=min(max_length, tokenizer.max_len),
example.text_a,
add_special_tokens=True,
max_length=min(max_length, tokenizer.max_len),
)
all_input_ids.append(input_ids)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment