Commit a75c64d8 authored by Lysandre's avatar Lysandre
Browse files

Black 20 release

parent e78c1103
...@@ -61,6 +61,5 @@ class RobertaConfig(BertConfig): ...@@ -61,6 +61,5 @@ class RobertaConfig(BertConfig):
model_type = "roberta" model_type = "roberta"
def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **kwargs): def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **kwargs):
"""Constructs RobertaConfig. """Constructs RobertaConfig."""
"""
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs) super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
...@@ -80,7 +80,10 @@ class T5Config(PretrainedConfig): ...@@ -80,7 +80,10 @@ class T5Config(PretrainedConfig):
**kwargs **kwargs
): ):
super().__init__( super().__init__(
pad_token_id=pad_token_id, eos_token_id=eos_token_id, is_encoder_decoder=is_encoder_decoder, **kwargs, pad_token_id=pad_token_id,
eos_token_id=eos_token_id,
is_encoder_decoder=is_encoder_decoder,
**kwargs,
) )
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.n_positions = n_positions self.n_positions = n_positions
......
...@@ -29,7 +29,7 @@ logger = logging.get_logger(__name__) ...@@ -29,7 +29,7 @@ logger = logging.get_logger(__name__)
class PretrainedConfig(object): class PretrainedConfig(object):
r""" Base class for all configuration classes. r"""Base class for all configuration classes.
Handles a few parameters common to all models' configurations as well as methods for loading/downloading/saving Handles a few parameters common to all models' configurations as well as methods for loading/downloading/saving
configurations. configurations.
......
...@@ -191,8 +191,7 @@ class XLMConfig(PretrainedConfig): ...@@ -191,8 +191,7 @@ class XLMConfig(PretrainedConfig):
bos_token_id=0, bos_token_id=0,
**kwargs **kwargs
): ):
"""Constructs XLMConfig. """Constructs XLMConfig."""
"""
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, **kwargs) super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, **kwargs)
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.emb_dim = emb_dim self.emb_dim = emb_dim
......
...@@ -162,8 +162,7 @@ class XLNetConfig(PretrainedConfig): ...@@ -162,8 +162,7 @@ class XLNetConfig(PretrainedConfig):
eos_token_id=2, eos_token_id=2,
**kwargs **kwargs
): ):
"""Constructs XLNetConfig. """Constructs XLNetConfig."""
"""
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs) super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.d_model = d_model self.d_model = d_model
......
...@@ -27,5 +27,6 @@ if __name__ == "__main__": ...@@ -27,5 +27,6 @@ if __name__ == "__main__":
checkpoint_path = os.path.join(args.dialogpt_path, f"{MODEL}_ft.pkl") checkpoint_path = os.path.join(args.dialogpt_path, f"{MODEL}_ft.pkl")
pytorch_dump_folder_path = f"./DialoGPT-{MODEL}" pytorch_dump_folder_path = f"./DialoGPT-{MODEL}"
convert_dialogpt_checkpoint( convert_dialogpt_checkpoint(
checkpoint_path, pytorch_dump_folder_path, checkpoint_path,
pytorch_dump_folder_path,
) )
...@@ -38,24 +38,39 @@ class OnnxConverterArgumentParser(ArgumentParser): ...@@ -38,24 +38,39 @@ class OnnxConverterArgumentParser(ArgumentParser):
super().__init__("ONNX Converter") super().__init__("ONNX Converter")
self.add_argument( self.add_argument(
"--pipeline", type=str, choices=SUPPORTED_PIPELINES, default="feature-extraction", "--pipeline",
type=str,
choices=SUPPORTED_PIPELINES,
default="feature-extraction",
) )
self.add_argument( self.add_argument(
"--model", type=str, required=True, help="Model's id or path (ex: bert-base-cased)", "--model",
type=str,
required=True,
help="Model's id or path (ex: bert-base-cased)",
) )
self.add_argument("--tokenizer", type=str, help="Tokenizer's id or path (ex: bert-base-cased)") self.add_argument("--tokenizer", type=str, help="Tokenizer's id or path (ex: bert-base-cased)")
self.add_argument( self.add_argument(
"--framework", type=str, choices=["pt", "tf"], help="Framework for loading the model", "--framework",
type=str,
choices=["pt", "tf"],
help="Framework for loading the model",
) )
self.add_argument("--opset", type=int, default=11, help="ONNX opset to use") self.add_argument("--opset", type=int, default=11, help="ONNX opset to use")
self.add_argument( self.add_argument(
"--check-loading", action="store_true", help="Check ONNX is able to load the model", "--check-loading",
action="store_true",
help="Check ONNX is able to load the model",
) )
self.add_argument( self.add_argument(
"--use-external-format", action="store_true", help="Allow exporting model >= than 2Gb", "--use-external-format",
action="store_true",
help="Allow exporting model >= than 2Gb",
) )
self.add_argument( self.add_argument(
"--quantize", action="store_true", help="Quantize the neural network to be run with int8", "--quantize",
action="store_true",
help="Quantize the neural network to be run with int8",
) )
self.add_argument("output") self.add_argument("output")
...@@ -376,7 +391,10 @@ def quantize(onnx_model_path: Path) -> Path: ...@@ -376,7 +391,10 @@ def quantize(onnx_model_path: Path) -> Path:
) )
quantized_model = quantize( quantized_model = quantize(
model=onnx_model, quantization_mode=QuantizationMode.IntegerOps, force_fusions=True, symmetric_weight=True, model=onnx_model,
quantization_mode=QuantizationMode.IntegerOps,
force_fusions=True,
symmetric_weight=True,
) )
# Append "-quantized" at the end of the model's name # Append "-quantized" at the end of the model's name
......
...@@ -255,7 +255,11 @@ license: apache-2.0 ...@@ -255,7 +255,11 @@ license: apache-2.0
def write_model_card( def write_model_card(
hf_model_name: str, repo_root="OPUS-MT-train", save_dir=Path("marian_converted"), dry_run=False, extra_metadata={}, hf_model_name: str,
repo_root="OPUS-MT-train",
save_dir=Path("marian_converted"),
dry_run=False,
extra_metadata={},
) -> str: ) -> str:
"""Copy the most recent model's readme section from opus, and add metadata. """Copy the most recent model's readme section from opus, and add metadata.
upload command: aws s3 sync model_card_dir s3://models.huggingface.co/bert/Helsinki-NLP/ --dryrun upload command: aws s3 sync model_card_dir s3://models.huggingface.co/bert/Helsinki-NLP/ --dryrun
...@@ -604,7 +608,9 @@ class OpusState: ...@@ -604,7 +608,9 @@ class OpusState:
assert "hidden_size" not in cfg.to_dict() assert "hidden_size" not in cfg.to_dict()
load_layers_( load_layers_(
model.model.encoder.layers, state_dict, BART_CONVERTER, model.model.encoder.layers,
state_dict,
BART_CONVERTER,
) )
load_layers_(model.model.decoder.layers, state_dict, BART_CONVERTER, is_decoder=True) load_layers_(model.model.decoder.layers, state_dict, BART_CONVERTER, is_decoder=True)
......
...@@ -108,7 +108,12 @@ if is_torch_available(): ...@@ -108,7 +108,12 @@ if is_torch_available():
logging.set_verbosity_info() logging.set_verbosity_info()
MODEL_CLASSES = { MODEL_CLASSES = {
"bert": (BertConfig, TFBertForPreTraining, BertForPreTraining, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,), "bert": (
BertConfig,
TFBertForPreTraining,
BertForPreTraining,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"bert-large-uncased-whole-word-masking-finetuned-squad": ( "bert-large-uncased-whole-word-masking-finetuned-squad": (
BertConfig, BertConfig,
TFBertForQuestionAnswering, TFBertForQuestionAnswering,
...@@ -127,9 +132,24 @@ MODEL_CLASSES = { ...@@ -127,9 +132,24 @@ MODEL_CLASSES = {
BertForSequenceClassification, BertForSequenceClassification,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"gpt2": (GPT2Config, TFGPT2LMHeadModel, GPT2LMHeadModel, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,), "gpt2": (
"xlnet": (XLNetConfig, TFXLNetLMHeadModel, XLNetLMHeadModel, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,), GPT2Config,
"xlm": (XLMConfig, TFXLMWithLMHeadModel, XLMWithLMHeadModel, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,), TFGPT2LMHeadModel,
GPT2LMHeadModel,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlnet": (
XLNetConfig,
TFXLNetLMHeadModel,
XLNetLMHeadModel,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlm": (
XLMConfig,
TFXLMWithLMHeadModel,
XLMWithLMHeadModel,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlm-roberta": ( "xlm-roberta": (
XLMRobertaConfig, XLMRobertaConfig,
TFXLMRobertaForMaskedLM, TFXLMRobertaForMaskedLM,
...@@ -148,7 +168,12 @@ MODEL_CLASSES = { ...@@ -148,7 +168,12 @@ MODEL_CLASSES = {
OpenAIGPTLMHeadModel, OpenAIGPTLMHeadModel,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"roberta": (RobertaConfig, TFRobertaForMaskedLM, RobertaForMaskedLM, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,), "roberta": (
RobertaConfig,
TFRobertaForMaskedLM,
RobertaForMaskedLM,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"roberta-large-mnli": ( "roberta-large-mnli": (
RobertaConfig, RobertaConfig,
TFRobertaForSequenceClassification, TFRobertaForSequenceClassification,
...@@ -179,10 +204,30 @@ MODEL_CLASSES = { ...@@ -179,10 +204,30 @@ MODEL_CLASSES = {
DistilBertForQuestionAnswering, DistilBertForQuestionAnswering,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"ctrl": (CTRLConfig, TFCTRLLMHeadModel, CTRLLMHeadModel, CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,), "ctrl": (
"albert": (AlbertConfig, TFAlbertForPreTraining, AlbertForPreTraining, ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,), CTRLConfig,
"t5": (T5Config, TFT5ForConditionalGeneration, T5ForConditionalGeneration, T5_PRETRAINED_CONFIG_ARCHIVE_MAP,), TFCTRLLMHeadModel,
"electra": (ElectraConfig, TFElectraForPreTraining, ElectraForPreTraining, ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,), CTRLLMHeadModel,
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"albert": (
AlbertConfig,
TFAlbertForPreTraining,
AlbertForPreTraining,
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"t5": (
T5Config,
TFT5ForConditionalGeneration,
T5ForConditionalGeneration,
T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"electra": (
ElectraConfig,
TFElectraForPreTraining,
ElectraForPreTraining,
ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
} }
......
...@@ -49,10 +49,12 @@ def set_layer_weights_in_torch_lsh(weights, torch_layer, hidden_size): ...@@ -49,10 +49,12 @@ def set_layer_weights_in_torch_lsh(weights, torch_layer, hidden_size):
torch.tensor(np_query_key).transpose(1, 2).contiguous().view(-1, hidden_size), torch.tensor(np_query_key).transpose(1, 2).contiguous().view(-1, hidden_size),
) )
set_param( set_param(
torch_layer.self_attention.value, torch.tensor(np_value).transpose(1, 2).contiguous().view(-1, hidden_size), torch_layer.self_attention.value,
torch.tensor(np_value).transpose(1, 2).contiguous().view(-1, hidden_size),
) )
set_param( set_param(
torch_layer.output.dense, torch.tensor(np_dense).view(-1, hidden_size).contiguous().transpose(0, 1), torch_layer.output.dense,
torch.tensor(np_dense).view(-1, hidden_size).contiguous().transpose(0, 1),
) )
...@@ -64,16 +66,20 @@ def set_layer_weights_in_torch_local(weights, torch_layer, hidden_size): ...@@ -64,16 +66,20 @@ def set_layer_weights_in_torch_local(weights, torch_layer, hidden_size):
np_dense = np.asarray(weights[3]) np_dense = np.asarray(weights[3])
set_param( set_param(
torch_layer.self_attention.query, torch.tensor(np_query).transpose(1, 2).contiguous().view(-1, hidden_size), torch_layer.self_attention.query,
torch.tensor(np_query).transpose(1, 2).contiguous().view(-1, hidden_size),
) )
set_param( set_param(
torch_layer.self_attention.key, torch.tensor(np_key).transpose(1, 2).contiguous().view(-1, hidden_size), torch_layer.self_attention.key,
torch.tensor(np_key).transpose(1, 2).contiguous().view(-1, hidden_size),
) )
set_param( set_param(
torch_layer.self_attention.value, torch.tensor(np_value).transpose(1, 2).contiguous().view(-1, hidden_size), torch_layer.self_attention.value,
torch.tensor(np_value).transpose(1, 2).contiguous().view(-1, hidden_size),
) )
set_param( set_param(
torch_layer.output.dense, torch.tensor(np_dense).view(-1, hidden_size).contiguous().transpose(0, 1), torch_layer.output.dense,
torch.tensor(np_dense).view(-1, hidden_size).contiguous().transpose(0, 1),
) )
...@@ -83,7 +89,9 @@ def set_block_weights_in_torch(weights, torch_block, hidden_size): ...@@ -83,7 +89,9 @@ def set_block_weights_in_torch(weights, torch_block, hidden_size):
layer_norm_1_weight = np.asarray(layer_norm_1[0]) layer_norm_1_weight = np.asarray(layer_norm_1[0])
layer_norm_1_bias = np.asarray(layer_norm_1[1]) layer_norm_1_bias = np.asarray(layer_norm_1[1])
set_param( set_param(
torch_block.attention.layer_norm, torch.tensor(layer_norm_1_weight), torch.tensor(layer_norm_1_bias), torch_block.attention.layer_norm,
torch.tensor(layer_norm_1_weight),
torch.tensor(layer_norm_1_bias),
) )
# lsh weights + output # lsh weights + output
...@@ -104,7 +112,9 @@ def set_block_weights_in_torch(weights, torch_block, hidden_size): ...@@ -104,7 +112,9 @@ def set_block_weights_in_torch(weights, torch_block, hidden_size):
layer_norm_2_weight = np.asarray(intermediate_weights[0][0]) layer_norm_2_weight = np.asarray(intermediate_weights[0][0])
layer_norm_2_bias = np.asarray(intermediate_weights[0][1]) layer_norm_2_bias = np.asarray(intermediate_weights[0][1])
set_param( set_param(
torch_block.feed_forward.layer_norm, torch.tensor(layer_norm_2_weight), torch.tensor(layer_norm_2_bias), torch_block.feed_forward.layer_norm,
torch.tensor(layer_norm_2_weight),
torch.tensor(layer_norm_2_bias),
) )
# intermediate dense # intermediate dense
...@@ -133,7 +143,8 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size): ...@@ -133,7 +143,8 @@ def set_model_weights_in_torch(weights, torch_model, hidden_size):
# word embeds # word embeds
word_embeddings = np.asarray(weights[1]) word_embeddings = np.asarray(weights[1])
set_param( set_param(
torch_model_reformer.embeddings.word_embeddings, torch.tensor(word_embeddings), torch_model_reformer.embeddings.word_embeddings,
torch.tensor(word_embeddings),
) )
if isinstance(weights[3], tuple): if isinstance(weights[3], tuple):
......
...@@ -86,7 +86,10 @@ class GlueDataset(Dataset): ...@@ -86,7 +86,10 @@ class GlueDataset(Dataset):
cached_features_file = os.path.join( cached_features_file = os.path.join(
cache_dir if cache_dir is not None else args.data_dir, cache_dir if cache_dir is not None else args.data_dir,
"cached_{}_{}_{}_{}".format( "cached_{}_{}_{}_{}".format(
mode.value, tokenizer.__class__.__name__, str(args.max_seq_length), args.task_name, mode.value,
tokenizer.__class__.__name__,
str(args.max_seq_length),
args.task_name,
), ),
) )
label_list = self.processor.get_labels() label_list = self.processor.get_labels()
......
...@@ -21,7 +21,11 @@ class TextDataset(Dataset): ...@@ -21,7 +21,11 @@ class TextDataset(Dataset):
""" """
def __init__( def __init__(
self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int, overwrite_cache=False, self,
tokenizer: PreTrainedTokenizer,
file_path: str,
block_size: int,
overwrite_cache=False,
): ):
assert os.path.isfile(file_path), f"Input file path {file_path} not found" assert os.path.isfile(file_path), f"Input file path {file_path} not found"
...@@ -29,7 +33,12 @@ class TextDataset(Dataset): ...@@ -29,7 +33,12 @@ class TextDataset(Dataset):
directory, filename = os.path.split(file_path) directory, filename = os.path.split(file_path)
cached_features_file = os.path.join( cached_features_file = os.path.join(
directory, "cached_lm_{}_{}_{}".format(tokenizer.__class__.__name__, str(block_size), filename,), directory,
"cached_lm_{}_{}_{}".format(
tokenizer.__class__.__name__,
str(block_size),
filename,
),
) )
# Make sure only the first process in distributed training processes the dataset, # Make sure only the first process in distributed training processes the dataset,
......
...@@ -119,7 +119,10 @@ class SquadDataset(Dataset): ...@@ -119,7 +119,10 @@ class SquadDataset(Dataset):
cached_features_file = os.path.join( cached_features_file = os.path.join(
cache_dir if cache_dir is not None else args.data_dir, cache_dir if cache_dir is not None else args.data_dir,
"cached_{}_{}_{}_{}".format( "cached_{}_{}_{}_{}".format(
mode.value, tokenizer.__class__.__name__, str(args.max_seq_length), version_tag, mode.value,
tokenizer.__class__.__name__,
str(args.max_seq_length),
version_tag,
), ),
) )
......
...@@ -589,7 +589,7 @@ def compute_predictions_log_probs( ...@@ -589,7 +589,7 @@ def compute_predictions_log_probs(
tokenizer, tokenizer,
verbose_logging, verbose_logging,
): ):
""" XLNet write prediction logic (more complex than Bert's). """XLNet write prediction logic (more complex than Bert's).
Write final predictions to the json file and log-odds of null if needed. Write final predictions to the json file and log-odds of null if needed.
Requires utils_squad_evaluate.py Requires utils_squad_evaluate.py
......
...@@ -69,7 +69,10 @@ def glue_convert_examples_to_features( ...@@ -69,7 +69,10 @@ def glue_convert_examples_to_features(
if is_tf_available(): if is_tf_available():
def _tf_glue_convert_examples_to_features( def _tf_glue_convert_examples_to_features(
examples: tf.data.Dataset, tokenizer: PreTrainedTokenizer, task=str, max_length: Optional[int] = None, examples: tf.data.Dataset,
tokenizer: PreTrainedTokenizer,
task=str,
max_length: Optional[int] = None,
) -> tf.data.Dataset: ) -> tf.data.Dataset:
""" """
Returns: Returns:
......
...@@ -269,7 +269,9 @@ class SingleSentenceClassificationProcessor(DataProcessor): ...@@ -269,7 +269,9 @@ class SingleSentenceClassificationProcessor(DataProcessor):
logger.info("Tokenizing example %d", ex_index) logger.info("Tokenizing example %d", ex_index)
input_ids = tokenizer.encode( input_ids = tokenizer.encode(
example.text_a, add_special_tokens=True, max_length=min(max_length, tokenizer.max_len), example.text_a,
add_special_tokens=True,
max_length=min(max_length, tokenizer.max_len),
) )
all_input_ids.append(input_ids) all_input_ids.append(input_ids)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment